[llvm] r319758 - [X86][AVX512] Cleanup VPCMP scheduler classes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 5 04:02:23 PST 2017


Author: rksimon
Date: Tue Dec  5 04:02:22 2017
New Revision: 319758

URL: http://llvm.org/viewvc/llvm-project?rev=319758&view=rev
Log:
[X86][AVX512] Cleanup VPCMP scheduler classes

Move hardcoded itinerary out to the instruction declarations. Not sure that IIC_SSE_ALU_F32P is the best schedule for integer comparisons, but I'm not going to change it right now.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319758&r1=319757&r2=319758&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Dec  5 04:02:22 2017
@@ -1952,19 +1952,19 @@ let Predicates = [HasAVX512] in {
 }
 
 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
-              X86VectorVTInfo _, bit IsCommutable> {
+              OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
   let isCommutable = IsCommutable in
   def rr : AVX512BI<opc, MRMSrcReg,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
-             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+             itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
   def rm : AVX512BI<opc, MRMSrcMem,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
                                      (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
-             IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+             itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   let isCommutable = IsCommutable in
   def rrk : AVX512BI<opc, MRMSrcReg,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
@@ -1972,7 +1972,7 @@ multiclass avx512_icmp_packed<bits<8> op
                           "$dst {${mask}}, $src1, $src2}"),
               [(set _.KRC:$dst, (and _.KRCWM:$mask,
                                    (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
-              IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+              itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
   def rmk : AVX512BI<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
@@ -1981,19 +1981,19 @@ multiclass avx512_icmp_packed<bits<8> op
                                    (OpNode (_.VT _.RC:$src1),
                                        (_.VT (bitconvert
                                               (_.LdFrag addr:$src2))))))],
-              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
+              itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
-              X86VectorVTInfo _, bit IsCommutable> :
-           avx512_icmp_packed<opc, OpcodeStr, OpNode, _, IsCommutable> {
+              OpndItins itins,  X86VectorVTInfo _, bit IsCommutable> :
+           avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
   def rmb : AVX512BI<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
                                     "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
               [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
                               (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
-              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+              itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   def rmbk : AVX512BI<opc, MRMSrcMem,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
                                        _.ScalarMemOp:$src2),
@@ -2004,69 +2004,72 @@ multiclass avx512_icmp_packed_rmb<bits<8
                                       (OpNode (_.VT _.RC:$src1),
                                         (X86VBroadcast
                                           (_.ScalarLdFrag addr:$src2)))))],
-               IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+               itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
+               Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
-                                 bit IsCommutable = 0> {
+                                 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
+                                 Predicate prd, bit IsCommutable = 0> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512,
+  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
                               IsCommutable>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256,
+    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
                                    IsCommutable>, EVEX_V256;
-    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128,
+    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
                                    IsCommutable>, EVEX_V128;
   }
 }
 
 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
-                                  SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
-                                  Predicate prd, bit IsCommutable = 0> {
+                                     SDNode OpNode, OpndItins itins,
+                                     AVX512VLVectorVTInfo VTInfo,
+                                     Predicate prd, bit IsCommutable = 0> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
+  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
                                   IsCommutable>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
+    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
                                        IsCommutable>, EVEX_V256;
-    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
+    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
                                        IsCommutable>, EVEX_V128;
   }
 }
 
+// FIXME: Is there a better scheduler itinerary for VPCMP?
 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
-                      avx512vl_i8_info, HasBWI, 1>,
+                      SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
                 EVEX_CD8<8, CD8VF>, VEX_WIG;
 
 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
-                      avx512vl_i16_info, HasBWI, 1>,
+                      SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
                 EVEX_CD8<16, CD8VF>, VEX_WIG;
 
 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
-                      avx512vl_i32_info, HasAVX512, 1>,
+                      SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
                 EVEX_CD8<32, CD8VF>;
 
 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
-                      avx512vl_i64_info, HasAVX512, 1>,
+                      SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
-                      avx512vl_i8_info, HasBWI>,
+                      SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
                 EVEX_CD8<8, CD8VF>, VEX_WIG;
 
 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
-                      avx512vl_i16_info, HasBWI>,
+                      SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
                 EVEX_CD8<16, CD8VF>, VEX_WIG;
 
 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
-                      avx512vl_i32_info, HasAVX512>,
+                      SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
                 EVEX_CD8<32, CD8VF>;
 
 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
-                      avx512vl_i64_info, HasAVX512>,
+                      SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
 // Transforms to swizzle an immediate to help matching memory operand in first

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319758&r1=319757&r2=319758&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Tue Dec  5 04:02:22 2017
@@ -2360,7 +2360,7 @@ define <16 x float> @sbto16f32(<16 x i32
 ; GENERIC-LABEL: sbto16f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
+; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0
 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2830,7 +2830,7 @@ define <16 x float> @ubto16f32(<16 x i32
 ; GENERIC-LABEL: ubto16f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2851,7 +2851,7 @@ define <16 x double> @ubto16f64(<16 x i3
 ; GENERIC-LABEL: ubto16f64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    movl {{.*}}(%rip), %eax # sched: [5:0.50]
 ; GENERIC-NEXT:    vpbroadcastd %eax, %ymm0 {%k1} {z}
 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
@@ -2880,7 +2880,7 @@ define <8 x float> @ubto8f32(<8 x i32> %
 ; GENERIC-LABEL: ubto8f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
 ; GENERIC-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2901,7 +2901,7 @@ define <8 x double> @ubto8f64(<8 x i32>
 ; GENERIC-LABEL: ubto8f64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2922,7 +2922,7 @@ define <4 x float> @ubto4f32(<4 x i32> %
 ; GENERIC-LABEL: ubto4f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
 ; GENERIC-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2943,7 +2943,7 @@ define <4 x double> @ubto4f64(<4 x i32>
 ; GENERIC-LABEL: ubto4f64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
 ; GENERIC-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4474,7 +4474,7 @@ define i16 @trunc_i32_to_i1(i32 %a) {
 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
 ; GENERIC-LABEL: sext_8i1_8i16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
+; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2w %k0, %xmm0
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4493,7 +4493,7 @@ define <8 x i16> @sext_8i1_8i16(<8 x i32
 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
 ; GENERIC-LABEL: sext_16i1_16i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
+; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4510,7 +4510,7 @@ define <16 x i32> @sext_16i1_16i32(<16 x
 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
 ; GENERIC-LABEL: sext_8i1_8i64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
+; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2q %k0, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4694,7 +4694,7 @@ define <4 x i64> @zext_8x32_to_4x64(<8 x
 define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
 ; GENERIC-LABEL: zext_64xi1_to_64xi8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4711,7 +4711,7 @@ define <64 x i8> @zext_64xi1_to_64xi8(<6
 define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
 ; GENERIC-LABEL: zext_32xi1_to_32xi16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4728,7 +4728,7 @@ define <32 x i16> @zext_32xi1_to_32xi16(
 define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
 ; GENERIC-LABEL: zext_16xi1_to_16xi16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4746,7 +4746,7 @@ define <16 x i16> @zext_16xi1_to_16xi16(
 define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
 ; GENERIC-LABEL: zext_32xi1_to_32xi8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4766,7 +4766,7 @@ define <4 x i32> @zext_4xi1_to_4x32(<4 x
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50]
 ; GENERIC-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4789,7 +4789,7 @@ define <2 x i64> @zext_2xi1_to_2xi64(<2
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50]
 ; GENERIC-NEXT:    vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7055,8 +7055,8 @@ entry:
 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
 ; GENERIC-LABEL: test4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0
-; GENERIC-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    kandnw %k0, %k1, %k0
 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0
 ; GENERIC-NEXT:    vzeroupper
@@ -7080,8 +7080,8 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
 ; GENERIC-LABEL: vcmp_test5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0
-; GENERIC-NEXT:    vpcmpgtq %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 # sched: [3:1.00]
+; GENERIC-NEXT:    vpcmpgtq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    kandnw %k1, %k0, %k0
 ; GENERIC-NEXT:    vpmovm2q %k0, %xmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7155,7 +7155,7 @@ define <16 x i8> @vcmp_test8(<16 x i32>%
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB386_1:
-; GENERIC-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0
+; GENERIC-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8079,7 +8079,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x
 ; GENERIC-LABEL: test_bitcast_v8i1_zext:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    kmovb %k0, %eax
 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
@@ -8105,7 +8105,7 @@ define i32 @test_bitcast_v16i1_zext(<16
 ; GENERIC-LABEL: test_bitcast_v16i1_zext:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    kmovw %k0, %eax
 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper

Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=319758&r1=319757&r2=319758&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Tue Dec  5 04:02:22 2017
@@ -24,7 +24,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -48,7 +48,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -69,7 +69,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -93,7 +93,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -114,7 +114,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -138,7 +138,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -174,7 +174,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -198,7 +198,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -235,7 +235,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -258,7 +258,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -281,7 +281,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -304,7 +304,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -327,7 +327,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -350,7 +350,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -389,7 +389,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -412,7 +412,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -450,7 +450,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -474,7 +474,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -495,7 +495,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -519,7 +519,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -540,7 +540,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -564,7 +564,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -600,7 +600,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -624,7 +624,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -661,7 +661,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -684,7 +684,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -707,7 +707,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -730,7 +730,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -753,7 +753,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -776,7 +776,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -815,7 +815,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -838,7 +838,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -876,7 +876,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -900,7 +900,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -921,7 +921,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -945,7 +945,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -966,7 +966,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -990,7 +990,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1026,7 +1026,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1050,7 +1050,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1087,7 +1087,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1110,7 +1110,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1133,7 +1133,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1156,7 +1156,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1179,7 +1179,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1202,7 +1202,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1241,7 +1241,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1264,7 +1264,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1302,7 +1302,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1326,7 +1326,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1347,7 +1347,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1371,7 +1371,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1392,7 +1392,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1416,7 +1416,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1452,7 +1452,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1476,7 +1476,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1513,7 +1513,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1536,7 +1536,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1559,7 +1559,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1582,7 +1582,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1605,7 +1605,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1628,7 +1628,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1667,7 +1667,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1690,7 +1690,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa32 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1725,7 +1725,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1747,7 +1747,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1766,7 +1766,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1788,7 +1788,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1807,7 +1807,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1829,7 +1829,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1861,7 +1861,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1883,7 +1883,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1916,7 +1916,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1937,7 +1937,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1958,7 +1958,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1979,7 +1979,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2000,7 +2000,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2021,7 +2021,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2056,7 +2056,7 @@ define <4 x i64> @test_masked_4xi64_perm
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2077,7 +2077,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2114,7 +2114,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2138,7 +2138,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2158,7 +2158,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2180,7 +2180,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2200,7 +2200,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2224,7 +2224,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2257,7 +2257,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2279,7 +2279,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2299,7 +2299,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2323,7 +2323,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2343,7 +2343,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2365,7 +2365,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2400,7 +2400,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2424,7 +2424,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2444,7 +2444,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2466,7 +2466,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2502,7 +2502,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2525,7 +2525,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2547,7 +2547,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2568,7 +2568,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2590,7 +2590,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2613,7 +2613,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2649,7 +2649,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2670,7 +2670,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2692,7 +2692,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2715,7 +2715,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2737,7 +2737,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2758,7 +2758,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2796,7 +2796,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2819,7 +2819,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2841,7 +2841,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2862,7 +2862,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2899,7 +2899,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2923,7 +2923,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2944,7 +2944,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2968,7 +2968,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2989,7 +2989,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3013,7 +3013,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3049,7 +3049,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3073,7 +3073,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3110,7 +3110,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3133,7 +3133,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3156,7 +3156,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3179,7 +3179,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3202,7 +3202,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3225,7 +3225,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3264,7 +3264,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3287,7 +3287,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3325,7 +3325,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3349,7 +3349,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3370,7 +3370,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3394,7 +3394,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3415,7 +3415,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3439,7 +3439,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3475,7 +3475,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3499,7 +3499,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3536,7 +3536,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3559,7 +3559,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3582,7 +3582,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3605,7 +3605,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3628,7 +3628,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3651,7 +3651,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3690,7 +3690,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3713,7 +3713,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3748,7 +3748,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3770,7 +3770,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3789,7 +3789,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3811,7 +3811,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3830,7 +3830,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3852,7 +3852,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3884,7 +3884,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3906,7 +3906,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3939,7 +3939,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3960,7 +3960,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3981,7 +3981,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4002,7 +4002,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4023,7 +4023,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4044,7 +4044,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4079,7 +4079,7 @@ define <4 x double> @test_masked_4xdoubl
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4100,7 +4100,7 @@ define <4 x double> @test_masked_z_4xdou
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4137,7 +4137,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4161,7 +4161,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4181,7 +4181,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4203,7 +4203,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4223,7 +4223,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4247,7 +4247,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4280,7 +4280,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4302,7 +4302,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4322,7 +4322,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4346,7 +4346,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4366,7 +4366,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4388,7 +4388,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4423,7 +4423,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4447,7 +4447,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4467,7 +4467,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4489,7 +4489,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4525,7 +4525,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4548,7 +4548,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4570,7 +4570,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4591,7 +4591,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4613,7 +4613,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4636,7 +4636,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4672,7 +4672,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4693,7 +4693,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4715,7 +4715,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4738,7 +4738,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4760,7 +4760,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4781,7 +4781,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4819,7 +4819,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4842,7 +4842,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4864,7 +4864,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4885,7 +4885,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4919,7 +4919,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4941,7 +4941,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4960,7 +4960,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4982,7 +4982,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5001,7 +5001,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5023,7 +5023,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5055,7 +5055,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5077,7 +5077,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5113,7 +5113,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5136,7 +5136,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5159,7 +5159,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5182,7 +5182,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5205,7 +5205,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5228,7 +5228,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5267,7 +5267,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm3, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5290,7 +5290,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %xmm2, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5325,7 +5325,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5347,7 +5347,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5366,7 +5366,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5388,7 +5388,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5407,7 +5407,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5429,7 +5429,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5461,7 +5461,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5483,7 +5483,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5519,7 +5519,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5542,7 +5542,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5565,7 +5565,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5588,7 +5588,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5611,7 +5611,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5634,7 +5634,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5673,7 +5673,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm3, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5696,7 +5696,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %ymm2, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5731,7 +5731,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5753,7 +5753,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5772,7 +5772,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5794,7 +5794,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5813,7 +5813,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5835,7 +5835,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5867,7 +5867,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5889,7 +5889,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5925,7 +5925,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5948,7 +5948,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5971,7 +5971,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5994,7 +5994,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6017,7 +6017,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6040,7 +6040,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6079,7 +6079,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6102,7 +6102,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [4:0.50]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqb %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6137,7 +6137,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6159,7 +6159,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6178,7 +6178,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6200,7 +6200,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6219,7 +6219,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6241,7 +6241,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6273,7 +6273,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6295,7 +6295,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6314,7 +6314,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6336,7 +6336,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6355,7 +6355,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6377,7 +6377,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6409,7 +6409,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6431,7 +6431,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6450,7 +6450,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6472,7 +6472,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6505,7 +6505,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6526,7 +6526,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6547,7 +6547,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6568,7 +6568,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6589,7 +6589,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6610,7 +6610,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6645,7 +6645,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6666,7 +6666,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6687,7 +6687,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6708,7 +6708,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6729,7 +6729,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6750,7 +6750,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6785,7 +6785,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6806,7 +6806,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6827,7 +6827,7 @@ define <8 x i16> @test_masked_8xi16_perm
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6848,7 +6848,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6882,7 +6882,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6904,7 +6904,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6923,7 +6923,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6945,7 +6945,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6964,7 +6964,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6986,7 +6986,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7018,7 +7018,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7040,7 +7040,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7059,7 +7059,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7081,7 +7081,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7100,7 +7100,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7122,7 +7122,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7154,7 +7154,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7176,7 +7176,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7195,7 +7195,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7217,7 +7217,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7250,7 +7250,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7271,7 +7271,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7292,7 +7292,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7313,7 +7313,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7334,7 +7334,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7355,7 +7355,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7390,7 +7390,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7411,7 +7411,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7432,7 +7432,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7453,7 +7453,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7474,7 +7474,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7495,7 +7495,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7530,7 +7530,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7551,7 +7551,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7572,7 +7572,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7593,7 +7593,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7627,7 +7627,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7649,7 +7649,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7668,7 +7668,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7690,7 +7690,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7709,7 +7709,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7731,7 +7731,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7763,7 +7763,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7785,7 +7785,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7804,7 +7804,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7826,7 +7826,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7845,7 +7845,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7867,7 +7867,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7899,7 +7899,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7921,7 +7921,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7940,7 +7940,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7962,7 +7962,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7995,7 +7995,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8016,7 +8016,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8037,7 +8037,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8058,7 +8058,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8079,7 +8079,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8100,7 +8100,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8135,7 +8135,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8156,7 +8156,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8177,7 +8177,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8198,7 +8198,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8220,7 +8220,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu16 %zmm2, %zmm0 {%k1}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8243,7 +8243,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu16 %zmm1, %zmm0 {%k1} {z}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8279,7 +8279,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8300,7 +8300,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8321,7 +8321,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8342,7 +8342,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8376,7 +8376,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8398,7 +8398,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8417,7 +8417,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8439,7 +8439,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8458,7 +8458,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8480,7 +8480,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8512,7 +8512,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8534,7 +8534,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8567,7 +8567,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8588,7 +8588,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8609,7 +8609,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8630,7 +8630,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8651,7 +8651,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8672,7 +8672,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8707,7 +8707,7 @@ define <4 x i32> @test_masked_4xi32_perm
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8728,7 +8728,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8762,7 +8762,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8784,7 +8784,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8803,7 +8803,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8825,7 +8825,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8844,7 +8844,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8866,7 +8866,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8898,7 +8898,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8920,7 +8920,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8953,7 +8953,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8974,7 +8974,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8995,7 +8995,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9016,7 +9016,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9037,7 +9037,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9058,7 +9058,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9093,7 +9093,7 @@ define <8 x i32> @test2_masked_8xi32_per
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9114,7 +9114,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9148,7 +9148,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9170,7 +9170,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9189,7 +9189,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9211,7 +9211,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9230,7 +9230,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9252,7 +9252,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9284,7 +9284,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9306,7 +9306,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9339,7 +9339,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9360,7 +9360,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9381,7 +9381,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9402,7 +9402,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9423,7 +9423,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9444,7 +9444,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9479,7 +9479,7 @@ define <16 x i32> @test2_masked_16xi32_p
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9500,7 +9500,7 @@ define <16 x i32> @test2_masked_z_16xi32
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9534,7 +9534,7 @@ define <8 x float> @test2_8xfloat_masked
 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9556,7 +9556,7 @@ define <8 x float> @test2_8xfloat_zero_m
 ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9575,7 +9575,7 @@ define <8 x float> @test2_8xfloat_masked
 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9597,7 +9597,7 @@ define <8 x float> @test2_8xfloat_zero_m
 ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9616,7 +9616,7 @@ define <8 x float> @test2_8xfloat_masked
 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9638,7 +9638,7 @@ define <8 x float> @test2_8xfloat_zero_m
 ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9670,7 +9670,7 @@ define <8 x float> @test2_8xfloat_masked
 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9692,7 +9692,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9725,7 +9725,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9748,7 +9748,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9769,7 +9769,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9792,7 +9792,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9813,7 +9813,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9836,7 +9836,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9871,7 +9871,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9894,7 +9894,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9928,7 +9928,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9950,7 +9950,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9969,7 +9969,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9991,7 +9991,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10010,7 +10010,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10032,7 +10032,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10064,7 +10064,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10086,7 +10086,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10119,7 +10119,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10142,7 +10142,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10163,7 +10163,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10186,7 +10186,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10207,7 +10207,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10230,7 +10230,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10265,7 +10265,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10288,7 +10288,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10322,7 +10322,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10344,7 +10344,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10363,7 +10363,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10385,7 +10385,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10404,7 +10404,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10426,7 +10426,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10458,7 +10458,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10480,7 +10480,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10513,7 +10513,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10536,7 +10536,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10557,7 +10557,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10580,7 +10580,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10601,7 +10601,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10624,7 +10624,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10659,7 +10659,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10682,7 +10682,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10716,7 +10716,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10738,7 +10738,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10757,7 +10757,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10779,7 +10779,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10798,7 +10798,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10820,7 +10820,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10852,7 +10852,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10874,7 +10874,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10907,7 +10907,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10930,7 +10930,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10951,7 +10951,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10974,7 +10974,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10995,7 +10995,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11018,7 +11018,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11053,7 +11053,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11076,7 +11076,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11110,7 +11110,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11132,7 +11132,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11151,7 +11151,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11173,7 +11173,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11192,7 +11192,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11214,7 +11214,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11246,7 +11246,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11268,7 +11268,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11301,7 +11301,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11324,7 +11324,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11345,7 +11345,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11368,7 +11368,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11389,7 +11389,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11412,7 +11412,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11447,7 +11447,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11470,7 +11470,7 @@ define <8 x i32> @test_8xi32_zero_masked
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11504,7 +11504,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11526,7 +11526,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11545,7 +11545,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11567,7 +11567,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11586,7 +11586,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11608,7 +11608,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11640,7 +11640,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11662,7 +11662,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11695,7 +11695,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11718,7 +11718,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11739,7 +11739,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11762,7 +11762,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11783,7 +11783,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11806,7 +11806,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11841,7 +11841,7 @@ define <16 x i32> @test_16xi32_masked_sh
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11864,7 +11864,7 @@ define <16 x i32> @test_16xi32_zero_mask
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11898,7 +11898,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11920,7 +11920,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11939,7 +11939,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11961,7 +11961,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11980,7 +11980,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12002,7 +12002,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12034,7 +12034,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12056,7 +12056,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12089,7 +12089,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12112,7 +12112,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12133,7 +12133,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12156,7 +12156,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12177,7 +12177,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12200,7 +12200,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12235,7 +12235,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12258,7 +12258,7 @@ define <4 x i64> @test_4xi64_zero_masked
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12292,7 +12292,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12314,7 +12314,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12333,7 +12333,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12355,7 +12355,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12374,7 +12374,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12396,7 +12396,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12428,7 +12428,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12450,7 +12450,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12483,7 +12483,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12506,7 +12506,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12527,7 +12527,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12550,7 +12550,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12571,7 +12571,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12594,7 +12594,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12629,7 +12629,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12652,7 +12652,7 @@ define <8 x i64> @test_8xi64_zero_masked
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12686,7 +12686,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12708,7 +12708,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12727,7 +12727,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12749,7 +12749,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12768,7 +12768,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12790,7 +12790,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12822,7 +12822,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12844,7 +12844,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12877,7 +12877,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12900,7 +12900,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12921,7 +12921,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12944,7 +12944,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12965,7 +12965,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12988,7 +12988,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13023,7 +13023,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13046,7 +13046,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13080,7 +13080,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13102,7 +13102,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13121,7 +13121,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13143,7 +13143,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13162,7 +13162,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13184,7 +13184,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13216,7 +13216,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13238,7 +13238,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13271,7 +13271,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13294,7 +13294,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13315,7 +13315,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13338,7 +13338,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13359,7 +13359,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13382,7 +13382,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13417,7 +13417,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13440,7 +13440,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13474,7 +13474,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13496,7 +13496,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13515,7 +13515,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13537,7 +13537,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13556,7 +13556,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13578,7 +13578,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13610,7 +13610,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13632,7 +13632,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13665,7 +13665,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13688,7 +13688,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13709,7 +13709,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13732,7 +13732,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13753,7 +13753,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13776,7 +13776,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13811,7 +13811,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13834,7 +13834,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13868,7 +13868,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13890,7 +13890,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13909,7 +13909,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13931,7 +13931,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13964,7 +13964,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13987,7 +13987,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14008,7 +14008,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14031,7 +14031,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14065,7 +14065,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14087,7 +14087,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14106,7 +14106,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14128,7 +14128,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14147,7 +14147,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14169,7 +14169,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14201,7 +14201,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14223,7 +14223,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14256,7 +14256,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14279,7 +14279,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14300,7 +14300,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14323,7 +14323,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14344,7 +14344,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14367,7 +14367,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14402,7 +14402,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14425,7 +14425,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14459,7 +14459,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14481,7 +14481,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14500,7 +14500,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14522,7 +14522,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14541,7 +14541,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14563,7 +14563,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14595,7 +14595,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14617,7 +14617,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14650,7 +14650,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14673,7 +14673,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14694,7 +14694,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14717,7 +14717,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14738,7 +14738,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14761,7 +14761,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14796,7 +14796,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14819,7 +14819,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14853,7 +14853,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14875,7 +14875,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14894,7 +14894,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14916,7 +14916,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14935,7 +14935,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14957,7 +14957,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14989,7 +14989,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15011,7 +15011,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15044,7 +15044,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15067,7 +15067,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15088,7 +15088,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15111,7 +15111,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15132,7 +15132,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15155,7 +15155,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15190,7 +15190,7 @@ define <4 x float> @test_4xfloat_masked_
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15213,7 +15213,7 @@ define <4 x float> @test_4xfloat_zero_ma
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15247,7 +15247,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15269,7 +15269,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15288,7 +15288,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15310,7 +15310,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15329,7 +15329,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15351,7 +15351,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15383,7 +15383,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15405,7 +15405,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15438,7 +15438,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15461,7 +15461,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15482,7 +15482,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15505,7 +15505,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15526,7 +15526,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15549,7 +15549,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15584,7 +15584,7 @@ define <8 x float> @test_8xfloat_masked_
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15607,7 +15607,7 @@ define <8 x float> @test_8xfloat_zero_ma
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15641,7 +15641,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15663,7 +15663,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15682,7 +15682,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15704,7 +15704,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15723,7 +15723,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15745,7 +15745,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15777,7 +15777,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15799,7 +15799,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15832,7 +15832,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15855,7 +15855,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15876,7 +15876,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15899,7 +15899,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15920,7 +15920,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15943,7 +15943,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15978,7 +15978,7 @@ define <16 x float> @test_16xfloat_maske
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16001,7 +16001,7 @@ define <16 x float> @test_16xfloat_zero_
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16035,7 +16035,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16057,7 +16057,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16076,7 +16076,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm4, %xmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm4, %xmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16098,7 +16098,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16131,7 +16131,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16154,7 +16154,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16175,7 +16175,7 @@ define <2 x double> @test_2xdouble_maske
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16198,7 +16198,7 @@ define <2 x double> @test_2xdouble_zero_
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %xmm2, %xmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %xmm2, %xmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16232,7 +16232,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16254,7 +16254,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16273,7 +16273,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16295,7 +16295,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16314,7 +16314,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16336,7 +16336,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16368,7 +16368,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16390,7 +16390,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16423,7 +16423,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16446,7 +16446,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16467,7 +16467,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16490,7 +16490,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16511,7 +16511,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16534,7 +16534,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16569,7 +16569,7 @@ define <4 x double> @test_4xdouble_maske
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16592,7 +16592,7 @@ define <4 x double> @test_4xdouble_zero_
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16626,7 +16626,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16648,7 +16648,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16667,7 +16667,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16689,7 +16689,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16708,7 +16708,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16730,7 +16730,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16762,7 +16762,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16784,7 +16784,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16817,7 +16817,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16840,7 +16840,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16861,7 +16861,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16884,7 +16884,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16905,7 +16905,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16928,7 +16928,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -16963,7 +16963,7 @@ define <8 x double> @test_8xdouble_maske
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -16986,7 +16986,7 @@ define <8 x double> @test_8xdouble_zero_
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpeqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;




More information about the llvm-commits mailing list