[llvm] r330480 - [X86] Add WriteFSign/WriteFLogic scheduler classes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 20 14:16:06 PDT 2018


Author: rksimon
Date: Fri Apr 20 14:16:05 2018
New Revision: 330480

URL: http://llvm.org/viewvc/llvm-project?rev=330480&view=rev
Log:
[X86] Add WriteFSign/WriteFLogic scheduler classes

Split the fp and integer vector logical instruction scheduler classes - older CPUs especially often handled these on different pipes.

This unearthed a couple of things that are also handled in this patch:

(1) We were tagging avx512 fp logic ops as WriteFAdd, probably because of the lack of WriteFLogic
(2) SandyBridge had integer logic ops only using Port5, when afaict they can use Ports015.
(3) Cleaned up x86 FCHS/FABS scheduling as they are typically treated as fp logic ops.

Differential Revision: https://reviews.llvm.org/D45629

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrFPStack.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
    llvm/trunk/lib/Target/X86/X86SchedHaswell.td
    llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/trunk/lib/Target/X86/X86Schedule.td
    llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
    llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
    llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
    llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
    llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
    llvm/trunk/test/CodeGen/X86/x87-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Apr 20 14:16:05 2018
@@ -5058,10 +5058,10 @@ let isCodeGenOnly = 1 in {
   defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFCmp, 1>;
   defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFCmp, 1>;
 }
-defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,  WriteFAdd, 1>;
-defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFAdd, 0>;
-defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,  WriteFAdd, 1>;
-defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, WriteFAdd, 1>;
+defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,  WriteFLogic, 1>;
+defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFLogic, 0>;
+defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,  WriteFLogic, 1>;
+defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, WriteFLogic, 1>;
 
 // Patterns catch floating point selects with bitcasted integer logic ops.
 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,

Modified: llvm/trunk/lib/Target/X86/X86InstrFPStack.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFPStack.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFPStack.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td Fri Apr 20 14:16:05 2018
@@ -309,7 +309,7 @@ def _F     : FPI<0xD9, fp, (outs), (ins)
 
 let Defs = [FPSW] in {
 
-let SchedRW = [WriteVecLogic] in {
+let SchedRW = [WriteFSign] in {
 defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
 defm ABS : FPUnary<fabs, MRM_E1, "fabs">;
 }

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Apr 20 14:16:05 2018
@@ -95,14 +95,14 @@ multiclass sse12_fp_packed_logical_rm<bi
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        pat_rr, d>,
-       Sched<[WriteVecLogic]>;
+       Sched<[WriteFLogic]>;
   let hasSideEffects = 0, mayLoad = 1 in
   def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        pat_rm, d>,
-       Sched<[WriteVecLogicLd, ReadAfterLd]>;
+       Sched<[WriteFLogic.Folded, ReadAfterLd]>;
 }
 
 

Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Fri Apr 20 14:16:05 2018
@@ -163,6 +163,8 @@ defm : BWWriteResPair<WriteFSqrt,  [BWPo
 defm : BWWriteResPair<WriteFRcp,   [BWPort0], 5>; // Floating point reciprocal estimate.
 defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5>; // Floating point reciprocal square root estimate.
 defm : BWWriteResPair<WriteFMA,    [BWPort01], 5>; // Fused Multiply Add.
+defm : BWWriteResPair<WriteFSign,     [BWPort5],  1>; // Floating point fabs/fchs.
+defm : BWWriteResPair<WriteFLogic,    [BWPort5],  1>; // Floating point and/or/xor logicals.
 defm : BWWriteResPair<WriteFShuffle,  [BWPort5],  1>; // Floating point vector shuffles.
 defm : BWWriteResPair<WriteFVarShuffle,  [BWPort5],  1>; // Floating point vector variable shuffles.
 defm : BWWriteResPair<WriteFBlend,  [BWPort015],  1>; // Floating point vector blends.
@@ -177,6 +179,7 @@ def  : WriteRes<WriteVecStore,       [BW
 def  : WriteRes<WriteVecMove,        [BWPort015]>;
 
 defm : BWWriteResPair<WriteVecALU,   [BWPort15],  1>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1>; // Vector integer and/or/xor.
 defm : BWWriteResPair<WriteVecShift, [BWPort0],  1>; // Vector integer shifts.
 defm : BWWriteResPair<WriteVecIMul,  [BWPort0],   5>; // Vector integer multiply.
 defm : BWWriteResPair<WritePMULLD,   [BWPort0], 10, [2], 2, 5>; // PMULLD
@@ -187,10 +190,6 @@ defm : BWWriteResPair<WriteVarBlend,  [B
 defm : BWWriteResPair<WriteMPSAD,  [BWPort0, BWPort5], 6, [1, 2]>; // Vector MPSAD.
 defm : BWWriteResPair<WritePSADBW,  [BWPort0],   5>; // Vector PSADBW.
 
-// Vector bitwise operations.
-// These are often used on both floating point and integer vectors.
-defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1>; // Vector and/or/xor.
-
 // Conversion between integer and float.
 defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
 defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
@@ -380,10 +379,6 @@ def: InstRW<[BWWriteResGroup3], (instreg
                                            "MMX_PUNPCKLBWirr",
                                            "MMX_PUNPCKLDQirr",
                                            "MMX_PUNPCKLWDirr",
-                                           "(V?)ANDNPD(Y?)rr",
-                                           "(V?)ANDNPS(Y?)rr",
-                                           "(V?)ANDPD(Y?)rr",
-                                           "(V?)ANDPS(Y?)rr",
                                            "VBROADCASTSSrr",
                                            "(V?)INSERTPSrr",
                                            "(V?)MOV64toPQIrr",
@@ -437,9 +432,7 @@ def: InstRW<[BWWriteResGroup3], (instreg
                                            "(V?)UNPCKHPD(Y?)rr",
                                            "(V?)UNPCKHPS(Y?)rr",
                                            "(V?)UNPCKLPD(Y?)rr",
-                                           "(V?)UNPCKLPS(Y?)rr",
-                                           "(V?)XORPD(Y?)rr",
-                                           "(V?)XORPS(Y?)rr")>;
+                                           "(V?)UNPCKLPS(Y?)rr")>;
 
 def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> {
   let Latency = 1;
@@ -552,21 +545,13 @@ def BWWriteResGroup8 : SchedWriteRes<[BW
   let ResourceCycles = [1];
 }
 def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVQ64rr",
-                                           "MMX_PANDNirr",
-                                           "MMX_PANDirr",
-                                           "MMX_PORirr",
-                                           "MMX_PXORirr",
                                            "(V?)BLENDPD(Y?)rri",
                                            "(V?)BLENDPS(Y?)rri",
                                            "(V?)MOVDQA(Y?)rr",
                                            "(V?)MOVDQU(Y?)rr",
                                            "(V?)MOVPQI2QIrr",
                                            "VMOVZPQILo2PQIrr",
-                                           "(V?)PANDN(Y?)rr",
-                                           "(V?)PAND(Y?)rr",
-                                           "VPBLENDD(Y?)rri",
-                                           "(V?)POR(Y?)rr",
-                                           "(V?)PXOR(Y?)rr")>;
+                                           "VPBLENDD(Y?)rri")>;
 
 def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> {
   let Latency = 1;
@@ -1238,17 +1223,11 @@ def: InstRW<[BWWriteResGroup61], (instre
                                             "MMX_PUNPCKLBWirm",
                                             "MMX_PUNPCKLDQirm",
                                             "MMX_PUNPCKLWDirm",
-                                            "(V?)ANDNPDrm",
-                                            "(V?)ANDNPSrm",
-                                            "(V?)ANDPDrm",
-                                            "(V?)ANDPSrm",
                                             "(V?)INSERTPSrm",
                                             "(V?)MOVHPDrm",
                                             "(V?)MOVHPSrm",
                                             "(V?)MOVLPDrm",
                                             "(V?)MOVLPSrm",
-                                            "(V?)ORPDrm",
-                                            "(V?)ORPSrm",
                                             "(V?)PACKSSDWrm",
                                             "(V?)PACKSSWBrm",
                                             "(V?)PACKUSDWrm",
@@ -1292,9 +1271,7 @@ def: InstRW<[BWWriteResGroup61], (instre
                                             "(V?)UNPCKHPDrm",
                                             "(V?)UNPCKHPSrm",
                                             "(V?)UNPCKLPDrm",
-                                            "(V?)UNPCKLPSrm",
-                                            "(V?)XORPDrm",
-                                            "(V?)XORPSrm")>;
+                                            "(V?)UNPCKLPSrm")>;
 
 def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> {
   let Latency = 6;
@@ -1387,19 +1364,11 @@ def BWWriteResGroup65 : SchedWriteRes<[B
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[BWWriteResGroup65], (instregex "MMX_PANDNirm",
-                                            "MMX_PANDirm",
-                                            "MMX_PORirm",
-                                            "MMX_PXORirm",
-                                            "(V?)BLENDPDrmi",
+def: InstRW<[BWWriteResGroup65], (instregex "(V?)BLENDPDrmi",
                                             "(V?)BLENDPSrmi",
                                             "VINSERTF128rm",
                                             "VINSERTI128rm",
-                                            "(V?)PANDNrm",
-                                            "(V?)PANDrm",
-                                            "VPBLENDDrmi",
-                                            "(V?)PORrm",
-                                            "(V?)PXORrm")>;
+                                            "VPBLENDDrmi")>;
 
 def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> {
   let Latency = 6;

Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Fri Apr 20 14:16:05 2018
@@ -160,6 +160,8 @@ defm : HWWriteResPair<WriteCvtF2I, [HWPo
 defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>;
 defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>;
 defm : HWWriteResPair<WriteFMA,   [HWPort01], 5>;
+defm : HWWriteResPair<WriteFSign,  [HWPort0], 1>;
+defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>;
 defm : HWWriteResPair<WriteFShuffle,  [HWPort5],  1>;
 defm : HWWriteResPair<WriteFVarShuffle,  [HWPort5],  1>;
 defm : HWWriteResPair<WriteFBlend,  [HWPort015],  1>;
@@ -173,7 +175,7 @@ def  : WriteRes<WriteVecLoad,        [HW
 def  : WriteRes<WriteVecMove,        [HWPort015]>;
 
 defm : HWWriteResPair<WriteVecShift, [HWPort0],  1>;
-defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1>;
+defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>;
 defm : HWWriteResPair<WriteVecALU,   [HWPort15],  1>;
 defm : HWWriteResPair<WriteVecIMul,  [HWPort0],   5>;
 defm : HWWriteResPair<WritePMULLD,   [HWPort0], 10, [2], 2, 6>;
@@ -494,12 +496,6 @@ def : InstRW<[HWWriteFRSTOR], (instregex
 
 //-- Arithmetic instructions --//
 
-// FABS.
-def : InstRW<[HWWriteP0], (instregex "ABS_F")>;
-
-// FCHS.
-def : InstRW<[HWWriteP0], (instregex "CHS_F")>;
-
 // FCOMPP FUCOMPP.
 // r.
 def : InstRW<[HWWrite2P01], (instregex "FCOMPP", "UCOM_FPPr")>;
@@ -737,10 +733,6 @@ def: InstRW<[HWWriteResGroup4], (instreg
                                            "MMX_PUNPCKLBWirr",
                                            "MMX_PUNPCKLDQirr",
                                            "MMX_PUNPCKLWDirr",
-                                           "(V?)ANDNPD(Y?)rr",
-                                           "(V?)ANDNPS(Y?)rr",
-                                           "(V?)ANDPD(Y?)rr",
-                                           "(V?)ANDPS(Y?)rr",
                                            "VBROADCASTSSrr",
                                            "(V?)INSERTPSrr",
                                            "(V?)MOV64toPQIrr",
@@ -756,8 +748,6 @@ def: InstRW<[HWWriteResGroup4], (instreg
                                            "(V?)MOVSSrr",
                                            "(V?)MOVUPD(Y?)rr",
                                            "(V?)MOVUPS(Y?)rr",
-                                           "(V?)ORPD(Y?)rr",
-                                           "(V?)ORPS(Y?)rr",
                                            "(V?)PACKSSDW(Y?)rr",
                                            "(V?)PACKSSWB(Y?)rr",
                                            "(V?)PACKUSDW(Y?)rr",
@@ -798,9 +788,7 @@ def: InstRW<[HWWriteResGroup4], (instreg
                                            "(V?)UNPCKHPD(Y?)rr",
                                            "(V?)UNPCKHPS(Y?)rr",
                                            "(V?)UNPCKLPD(Y?)rr",
-                                           "(V?)UNPCKLPS(Y?)rr",
-                                           "(V?)XORPD(Y?)rr",
-                                           "(V?)XORPS(Y?)rr")>;
+                                           "(V?)UNPCKLPS(Y?)rr")>;
 
 def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> {
   let Latency = 1;
@@ -925,11 +913,7 @@ def: InstRW<[HWWriteResGroup9], (instreg
                                            "(V?)MOVDQU(Y?)rr",
                                            "(V?)MOVPQI2QIrr",
                                            "VMOVZPQILo2PQIrr",
-                                           "(V?)PANDN(Y?)rr",
-                                           "(V?)PAND(Y?)rr",
-                                           "VPBLENDD(Y?)rri",
-                                           "(V?)POR(Y?)rr",
-                                           "(V?)PXOR(Y?)rr")>;
+                                           "VPBLENDD(Y?)rri")>;
 
 def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> {
   let Latency = 1;
@@ -1041,13 +1025,7 @@ def HWWriteResGroup13 : SchedWriteRes<[H
   let ResourceCycles = [1,1];
 }
 def: InstRW<[HWWriteResGroup13], (instregex "PUNPCKLWDrm",
-                                            "(V?)ANDNPDrm",
-                                            "(V?)ANDNPSrm",
-                                            "(V?)ANDPDrm",
-                                            "(V?)ANDPSrm",
                                             "(V?)INSERTPSrm",
-                                            "(V?)ORPDrm",
-                                            "(V?)ORPSrm",
                                             "(V?)PACKSSDWrm",
                                             "(V?)PACKSSWBrm",
                                             "(V?)PACKUSDWrm",
@@ -1075,9 +1053,7 @@ def: InstRW<[HWWriteResGroup13], (instre
                                             "(V?)UNPCKHPDrm",
                                             "(V?)UNPCKHPSrm",
                                             "(V?)UNPCKLPDrm",
-                                            "(V?)UNPCKLPSrm",
-                                            "(V?)XORPDrm",
-                                            "(V?)XORPSrm")>;
+                                            "(V?)UNPCKLPSrm")>;
 
 def HWWriteResGroup13_1 : SchedWriteRes<[HWPort5,HWPort23]> {
   let Latency = 8;
@@ -1310,11 +1286,7 @@ def: InstRW<[HWWriteResGroup17], (instre
                                             "(V?)BLENDPSrmi",
                                             "VINSERTF128rm",
                                             "VINSERTI128rm",
-                                            "(V?)PANDNrm",
-                                            "(V?)PANDrm",
-                                            "VPBLENDDrmi",
-                                            "(V?)PORrm",
-                                            "(V?)PXORrm")>;
+                                            "VPBLENDDrmi")>;
 
 def HWWriteResGroup17_1 : SchedWriteRes<[HWPort23,HWPort015]> {
   let Latency = 6;

Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Fri Apr 20 14:16:05 2018
@@ -149,6 +149,8 @@ defm : SBWriteResPair<WriteFSqrt,  [SBPo
 defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>;
 defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>;
 defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>;
+defm : SBWriteResPair<WriteFSign,    [SBPort5], 1>;
+defm : SBWriteResPair<WriteFLogic,   [SBPort5], 1, [1], 1, 6>;
 defm : SBWriteResPair<WriteFShuffle, [SBPort5],  1>;
 defm : SBWriteResPair<WriteFVarShuffle, [SBPort5],  1>;
 defm : SBWriteResPair<WriteFBlend,  [SBPort05],  1>;
@@ -160,7 +162,7 @@ def  : WriteRes<WriteVecLoad,        [SB
 def  : WriteRes<WriteVecMove,        [SBPort05]>;
 
 defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>;
-defm : SBWriteResPair<WriteVecLogic, [SBPort5], 1>;
+defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>;
 defm : SBWriteResPair<WriteVecALU,   [SBPort1], 3>;
 defm : SBWriteResPair<WriteVecIMul,  [SBPort0], 5>;
 defm : SBWriteResPair<WritePMULLD,   [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model
@@ -451,11 +453,7 @@ def: InstRW<[SBWriteResGroup6], (instreg
                                            "MOVDQArr", //TODO: Why are these separated from their VEX equivalent
                                            "MOVDQUrr", // TODO: Why are these separated from their VEX equivalent
                                            "(V?)MOVPQI2QIrr",
-                                           "(V?)MOVZPQILo2PQIrr",
-                                           "(V?)PANDNrr",
-                                           "(V?)PANDrr",
-                                           "(V?)PORrr",
-                                           "(V?)PXORrr")>;
+                                           "(V?)MOVZPQILo2PQIrr")>;
 
 def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> {
   let Latency = 2;
@@ -967,7 +965,11 @@ def SBWriteResGroup52 : SchedWriteRes<[S
   let ResourceCycles = [1,1];
 }
 def: InstRW<[SBWriteResGroup52], (instregex "LODSL",
-                                            "LODSQ")>;
+                                            "LODSQ",
+                                            "MMX_PANDirm",
+                                            "MMX_PANDNirm",
+                                            "MMX_PORirm",
+                                            "MMX_PXORirm")>;
 
 def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
   let Latency = 6;
@@ -1012,18 +1014,12 @@ def SBWriteResGroup56 : SchedWriteRes<[S
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SBWriteResGroup56], (instregex "(V?)ANDNPDrm",
-                                            "(V?)ANDNPSrm",
-                                            "(V?)ANDPDrm",
-                                            "(V?)ANDPSrm",
-                                            "VBROADCASTF128",
+def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128",
                                             "(V?)INSERTPSrm",
                                             "(V?)MOVHPDrm",
                                             "(V?)MOVHPSrm",
                                             "(V?)MOVLPDrm",
                                             "(V?)MOVLPSrm",
-                                            "(V?)ORPDrm",
-                                            "(V?)ORPSrm",
                                             "VPERMILPDmi",
                                             "VPERMILPDrm",
                                             "VPERMILPSmi",
@@ -1033,9 +1029,7 @@ def: InstRW<[SBWriteResGroup56], (instre
                                             "(V?)UNPCKHPDrm",
                                             "(V?)UNPCKHPSrm",
                                             "(V?)UNPCKLPDrm",
-                                            "(V?)UNPCKLPSrm",
-                                            "(V?)XORPDrm",
-                                            "(V?)XORPSrm")>;
+                                            "(V?)UNPCKLPSrm")>;
 
 def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
   let Latency = 7;
@@ -1130,16 +1124,6 @@ def: InstRW<[SBWriteResGroup59], (instre
                                             "(V?)PUNPCKLQDQrm",
                                             "(V?)PUNPCKLWDrm")>;
 
-def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> {
-  let Latency = 7;
-  let NumMicroOps = 2;
-  let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup60], (instregex "(V?)PANDNrm",
-                                            "(V?)PANDrm",
-                                            "(V?)PORrm",
-                                            "(V?)PXORrm")>;
-
 def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort05]> {
   let Latency = 7;
   let NumMicroOps = 3;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Fri Apr 20 14:16:05 2018
@@ -160,6 +160,8 @@ defm : SKLWriteResPair<WriteFSqrt,  [SKL
 defm : SKLWriteResPair<WriteFRcp,   [SKLPort0], 5>; // Floating point reciprocal estimate.
 defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 5>; // Floating point reciprocal square root estimate.
 defm : SKLWriteResPair<WriteFMA,    [SKLPort01], 4>; // Fused Multiply Add.
+defm : SKLWriteResPair<WriteFSign,   [SKLPort0], 1>; // Floating point fabs/fchs.
+defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
 defm : SKLWriteResPair<WriteFShuffle,  [SKLPort5],  1>; // Floating point vector shuffles.
 defm : SKLWriteResPair<WriteFVarShuffle,  [SKLPort5],  1>; // Floating point vector shuffles.
 defm : SKLWriteResPair<WriteFBlend,  [SKLPort015],  1>; // Floating point vector blends.
@@ -174,6 +176,7 @@ def  : WriteRes<WriteVecStore,        [S
 def  : WriteRes<WriteVecMove,         [SKLPort015]>;
 
 defm : SKLWriteResPair<WriteVecALU,   [SKLPort15],  1>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
 defm : SKLWriteResPair<WriteVecShift, [SKLPort0],  1>; // Vector integer shifts.
 defm : SKLWriteResPair<WriteVecIMul,  [SKLPort0],   5>; // Vector integer multiply.
 defm : SKLWriteResPair<WritePMULLD,   [SKLPort01], 10, [2], 2, 6>;
@@ -184,10 +187,6 @@ defm : SKLWriteResPair<WriteVarBlend,  [
 defm : SKLWriteResPair<WriteMPSAD,  [SKLPort0, SKLPort5], 6, [1, 2]>; // Vector MPSAD.
 defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3>; // Vector PSADBW.
 
-// Vector bitwise operations.
-// These are often used on both floating point and integer vectors.
-defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1>; // Vector and/or/xor.
-
 // Conversion between integer and float.
 defm : SKLWriteResPair<WriteCvtF2I, [SKLPort1], 3>; // Float -> Integer.
 defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
@@ -565,11 +564,7 @@ def SKLWriteResGroup9 : SchedWriteRes<[S
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKLWriteResGroup9], (instregex "(V?)ANDNPD(Y?)rr",
-                                            "(V?)ANDNPS(Y?)rr",
-                                            "(V?)ANDPD(Y?)rr",
-                                            "(V?)ANDPS(Y?)rr",
-                                            "(V?)BLENDPD(Y?)rri",
+def: InstRW<[SKLWriteResGroup9], (instregex "(V?)BLENDPD(Y?)rri",
                                             "(V?)BLENDPS(Y?)rri",
                                             "(V?)MOVAPD(Y?)rr",
                                             "(V?)MOVAPS(Y?)rr",
@@ -579,23 +574,15 @@ def: InstRW<[SKLWriteResGroup9], (instre
                                             "(V?)MOVUPD(Y?)rr",
                                             "(V?)MOVUPS(Y?)rr",
                                             "(V?)MOVZPQILo2PQIrr",
-                                            "(V?)ORPD(Y?)rr",
-                                            "(V?)ORPS(Y?)rr",
                                             "(V?)PADDB(Y?)rr",
                                             "(V?)PADDD(Y?)rr",
                                             "(V?)PADDQ(Y?)rr",
                                             "(V?)PADDW(Y?)rr",
-                                            "(V?)PANDN(Y?)rr",
-                                            "(V?)PAND(Y?)rr",
                                             "VPBLENDD(Y?)rri",
-                                            "(V?)POR(Y?)rr",
                                             "(V?)PSUBB(Y?)rr",
                                             "(V?)PSUBD(Y?)rr",
                                             "(V?)PSUBQ(Y?)rr",
-                                            "(V?)PSUBW(Y?)rr",
-                                            "(V?)PXOR(Y?)rr",
-                                            "(V?)XORPD(Y?)rr",
-                                            "(V?)XORPS(Y?)rr")>;
+                                            "(V?)PSUBW(Y?)rr")>;
 
 def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> {
   let Latency = 1;
@@ -1605,35 +1592,23 @@ def SKLWriteResGroup91 : SchedWriteRes<[
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKLWriteResGroup91], (instregex "(V?)ANDNPDrm",
-                                             "(V?)ANDNPSrm",
-                                             "(V?)ANDPDrm",
-                                             "(V?)ANDPSrm",
-                                             "(V?)BLENDPDrmi",
+def: InstRW<[SKLWriteResGroup91], (instregex "(V?)BLENDPDrmi",
                                              "(V?)BLENDPSrmi",
                                              "(V?)INSERTF128rm",
                                              "(V?)INSERTI128rm",
                                              "(V?)MASKMOVPDrm",
                                              "(V?)MASKMOVPSrm",
-                                             "(V?)ORPDrm",
-                                             "(V?)ORPSrm",
                                              "(V?)PADDBrm",
                                              "(V?)PADDDrm",
                                              "(V?)PADDQrm",
                                              "(V?)PADDWrm",
-                                             "(V?)PANDNrm",
-                                             "(V?)PANDrm",
                                              "(V?)PBLENDDrmi",
                                              "(V?)PMASKMOVDrm",
                                              "(V?)PMASKMOVQrm",
-                                             "(V?)PORrm",
                                              "(V?)PSUBBrm",
                                              "(V?)PSUBDrm",
                                              "(V?)PSUBQrm",
-                                             "(V?)PSUBWrm",
-                                             "(V?)PXORrm",
-                                             "(V?)XORPDrm",
-                                             "(V?)XORPSrm")>;
+                                             "(V?)PSUBWrm")>;
 
 def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> {
   let Latency = 7;

Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Fri Apr 20 14:16:05 2018
@@ -160,6 +160,8 @@ defm : SKXWriteResPair<WriteFSqrt,  [SKX
 defm : SKXWriteResPair<WriteFRcp,   [SKXPort0], 5>; // Floating point reciprocal estimate.
 defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 5>; // Floating point reciprocal square root estimate.
 defm : SKXWriteResPair<WriteFMA,  [SKXPort015], 4>; // Fused Multiply Add.
+defm : SKXWriteResPair<WriteFSign,  [SKXPort0], 1>; // Floating point fabs/fchs.
+defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
 defm : SKXWriteResPair<WriteFShuffle,  [SKXPort5],  1>; // Floating point vector shuffles.
 defm : SKXWriteResPair<WriteFVarShuffle,  [SKXPort5],  1>; // Floating point vector variable shuffles.
 defm : SKXWriteResPair<WriteFBlend,  [SKXPort015],  1>; // Floating point vector blends.
@@ -174,6 +176,7 @@ def  : WriteRes<WriteVecStore,        [S
 def  : WriteRes<WriteVecMove,         [SKXPort015]>;
 
 defm : SKXWriteResPair<WriteVecALU,   [SKXPort15],  1>; // Vector integer ALU op, no logicals.
+defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor.
 defm : SKXWriteResPair<WriteVecShift, [SKXPort0],  1>; // Vector integer shifts.
 defm : SKXWriteResPair<WriteVecIMul,  [SKXPort0],   5>; // Vector integer multiply.
 defm : SKXWriteResPair<WritePMULLD,   [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply.
@@ -184,10 +187,6 @@ defm : SKXWriteResPair<WriteVarBlend,  [
 defm : SKXWriteResPair<WriteMPSAD,  [SKXPort0, SKXPort5], 6, [1, 2]>; // Vector MPSAD.
 defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3>; // Vector PSADBW.
 
-// Vector bitwise operations.
-// These are often used on both floating point and integer vectors.
-defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1>; // Vector and/or/xor.
-
 // Conversion between integer and float.
 defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>; // Float -> Integer.
 defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
@@ -1026,11 +1025,7 @@ def SKXWriteResGroup9 : SchedWriteRes<[S
   let NumMicroOps = 1;
   let ResourceCycles = [1];
 }
-def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPDrr",
-                                            "ANDNPSrr",
-                                            "ANDPDrr",
-                                            "ANDPSrr",
-                                            "BLENDPDrri",
+def: InstRW<[SKXWriteResGroup9], (instregex "BLENDPDrri",
                                             "BLENDPSrri",
                                             "MOVAPDrr",
                                             "MOVAPSrr",
@@ -1039,40 +1034,14 @@ def: InstRW<[SKXWriteResGroup9], (instre
                                             "MOVPQI2QIrr",
                                             "MOVUPDrr",
                                             "MOVUPSrr",
-                                            "ORPDrr",
-                                            "ORPSrr",
                                             "PADDBrr",
                                             "PADDDrr",
                                             "PADDQrr",
                                             "PADDWrr",
-                                            "PANDNrr",
-                                            "PANDrr",
-                                            "PORrr",
                                             "PSUBBrr",
                                             "PSUBDrr",
                                             "PSUBQrr",
                                             "PSUBWrr",
-                                            "PXORrr",
-                                            "VANDNPDYrr",
-                                            "VANDNPDZ128rr",
-                                            "VANDNPDZ256rr",
-                                            "VANDNPDZrr",
-                                            "VANDNPDrr",
-                                            "VANDNPSYrr",
-                                            "VANDNPSZ128rr",
-                                            "VANDNPSZ256rr",
-                                            "VANDNPSZrr",
-                                            "VANDNPSrr",
-                                            "VANDPDYrr",
-                                            "VANDPDZ128rr",
-                                            "VANDPDZ256rr",
-                                            "VANDPDZrr",
-                                            "VANDPDrr",
-                                            "VANDPSYrr",
-                                            "VANDPSZ128rr",
-                                            "VANDPSZ256rr",
-                                            "VANDPSZrr",
-                                            "VANDPSrr",
                                             "VBLENDMPDZ128rr",
                                             "VBLENDMPDZ256rr",
                                             "VBLENDMPDZrr",
@@ -1128,16 +1097,6 @@ def: InstRW<[SKXWriteResGroup9], (instre
                                             "VMOVUPSYrr",
                                             "VMOVUPSrr",
                                             "VMOVZPQILo2PQIrr",
-                                            "VORPDYrr",
-                                            "VORPDZ128rr",
-                                            "VORPDZ256rr",
-                                            "VORPDZrr",
-                                            "VORPDrr",
-                                            "VORPSYrr",
-                                            "VORPSZ128rr",
-                                            "VORPSZ256rr",
-                                            "VORPSZrr",
-                                            "VORPSrr",
                                             "VPADDBYrr",
                                             "VPADDBZ128rr",
                                             "VPADDBZ256rr",
@@ -1158,22 +1117,6 @@ def: InstRW<[SKXWriteResGroup9], (instre
                                             "VPADDWZ256rr",
                                             "VPADDWZrr",
                                             "VPADDWrr",
-                                            "VPANDDZ128rr",
-                                            "VPANDDZ256rr",
-                                            "VPANDDZrr",
-                                            "VPANDNDZ128rr",
-                                            "VPANDNDZ256rr",
-                                            "VPANDNDZrr",
-                                            "VPANDNQZ128rr",
-                                            "VPANDNQZ256rr",
-                                            "VPANDNQZrr",
-                                            "VPANDNYrr",
-                                            "VPANDNrr",
-                                            "VPANDQZ128rr",
-                                            "VPANDQZ256rr",
-                                            "VPANDQZrr",
-                                            "VPANDYrr",
-                                            "VPANDrr",
                                             "VPBLENDDYrri",
                                             "VPBLENDDrri",
                                             "VPBLENDMBZ128rr",
@@ -1188,14 +1131,6 @@ def: InstRW<[SKXWriteResGroup9], (instre
                                             "VPBLENDMWZ128rr",
                                             "VPBLENDMWZ256rr",
                                             "VPBLENDMWZrr",
-                                            "VPORDZ128rr",
-                                            "VPORDZ256rr",
-                                            "VPORDZrr",
-                                            "VPORQZ128rr",
-                                            "VPORQZ256rr",
-                                            "VPORQZrr",
-                                            "VPORYrr",
-                                            "VPORrr",
                                             "VPSUBBYrr",
                                             "VPSUBBZ128rr",
                                             "VPSUBBZ256rr",
@@ -1220,27 +1155,7 @@ def: InstRW<[SKXWriteResGroup9], (instre
                                             "VPTERNLOGDZrri",
                                             "VPTERNLOGQZ128rri",
                                             "VPTERNLOGQZ256rri",
-                                            "VPTERNLOGQZrri",
-                                            "VPXORDZ128rr",
-                                            "VPXORDZ256rr",
-                                            "VPXORDZrr",
-                                            "VPXORQZ128rr",
-                                            "VPXORQZ256rr",
-                                            "VPXORQZrr",
-                                            "VPXORYrr",
-                                            "VPXORrr",
-                                            "VXORPDYrr",
-                                            "VXORPDZ128rr",
-                                            "VXORPDZ256rr",
-                                            "VXORPDZrr",
-                                            "VXORPDrr",
-                                            "VXORPSYrr",
-                                            "VXORPSZ128rr",
-                                            "VXORPSZ256rr",
-                                            "VXORPSZrr",
-                                            "VXORPSrr",
-                                            "XORPDrr",
-                                            "XORPSrr")>;
+                                            "VPTERNLOGQZrri")>;
 
 def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> {
   let Latency = 1;
@@ -3346,34 +3261,16 @@ def SKXWriteResGroup95 : SchedWriteRes<[
   let NumMicroOps = 2;
   let ResourceCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPDrm",
-                                             "ANDNPSrm",
-                                             "ANDPDrm",
-                                             "ANDPSrm",
-                                             "BLENDPDrmi",
+def: InstRW<[SKXWriteResGroup95], (instregex "BLENDPDrmi",
                                              "BLENDPSrmi",
-                                             "ORPDrm",
-                                             "ORPSrm",
                                              "PADDBrm",
                                              "PADDDrm",
                                              "PADDQrm",
                                              "PADDWrm",
-                                             "PANDNrm",
-                                             "PANDrm",
-                                             "PORrm",
                                              "PSUBBrm",
                                              "PSUBDrm",
                                              "PSUBQrm",
                                              "PSUBWrm",
-                                             "PXORrm",
-                                             "VANDNPDZ128rm(b?)",
-                                             "VANDNPDrm",
-                                             "VANDNPSZ128rm(b?)",
-                                             "VANDNPSrm",
-                                             "VANDPDZ128rm(b?)",
-                                             "VANDPDrm",
-                                             "VANDPSZ128rm(b?)",
-                                             "VANDPSrm",
                                              "VBLENDMPDZ128rm(b?)",
                                              "VBLENDMPSZ128rm(b?)",
                                              "VBLENDPDrmi",
@@ -3398,10 +3295,6 @@ def: InstRW<[SKXWriteResGroup95], (instr
                                              "VMOVSLDUPZ128rm(b?)",
                                              "VMOVUPDZ128rm(b?)",
                                              "VMOVUPSZ128rm(b?)",
-                                             "VORPDZ128rm(b?)",
-                                             "VORPDrm",
-                                             "VORPSZ128rm(b?)",
-                                             "VORPSrm",
                                              "VPADDBZ128rm(b?)",
                                              "VPADDBrm",
                                              "VPADDDZ128rm(b?)",
@@ -3410,12 +3303,6 @@ def: InstRW<[SKXWriteResGroup95], (instr
                                              "VPADDQrm",
                                              "VPADDWZ128rm(b?)",
                                              "VPADDWrm",
-                                             "VPANDDZ128rm(b?)",
-                                             "VPANDNDZ128rm(b?)",
-                                             "VPANDNQZ128rm(b?)",
-                                             "VPANDNrm",
-                                             "VPANDQZ128rm(b?)",
-                                             "VPANDrm",
                                              "VPBLENDDrmi",
                                              "VPBLENDMBZ128rm(b?)",
                                              "VPBLENDMDZ128rm(b?)",
@@ -3425,8 +3312,6 @@ def: InstRW<[SKXWriteResGroup95], (instr
                                              "VPBROADCASTQZ128m(b?)",
                                              "VPMASKMOVDrm",
                                              "VPMASKMOVQrm",
-                                             "VPORDZ128rm(b?)",
-                                             "VPORQZ128rm(b?)",
                                              "VPORrm",
                                              "VPSUBBZ128rm(b?)",
                                              "VPSUBBrm",
@@ -3437,16 +3322,7 @@ def: InstRW<[SKXWriteResGroup95], (instr
                                              "VPSUBWZ128rm(b?)",
                                              "VPSUBWrm",
                                              "VPTERNLOGDZ128rm(b?)i",
-                                             "VPTERNLOGQZ128rm(b?)i",
-                                             "VPXORDZ128rm(b?)",
-                                             "VPXORQZ128rm(b?)",
-                                             "VPXORrm",
-                                             "VXORPDZ128rm(b?)",
-                                             "VXORPDrm",
-                                             "VXORPSZ128rm(b?)",
-                                             "VXORPSrm",
-                                             "XORPDrm",
-                                             "XORPSrm")>;
+                                             "VPTERNLOGQZ128rm(b?)i")>;
 
 def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> {
   let Latency = 7;

Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Fri Apr 20 14:16:05 2018
@@ -87,6 +87,8 @@ defm WriteFSqrt  : X86SchedWritePair; //
 defm WriteFRcp   : X86SchedWritePair; // Floating point reciprocal estimate.
 defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate.
 defm WriteFMA    : X86SchedWritePair; // Fused Multiply Add.
+defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs.
+defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals.
 defm WriteFShuffle  : X86SchedWritePair; // Floating point vector shuffles.
 defm WriteFVarShuffle  : X86SchedWritePair; // Floating point vector variable shuffles.
 defm WriteFBlend  : X86SchedWritePair; // Floating point vector blends.
@@ -104,6 +106,7 @@ def  WriteVecLoad  : SchedWrite;
 def  WriteVecStore : SchedWrite;
 def  WriteVecMove  : SchedWrite;
 defm WriteVecALU   : X86SchedWritePair; // Vector integer ALU op, no logicals.
+defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals.
 defm WriteVecShift : X86SchedWritePair; // Vector integer shifts.
 defm WriteVecIMul  : X86SchedWritePair; // Vector integer multiply.
 defm WritePMULLD : X86SchedWritePair; // PMULLD
@@ -114,10 +117,6 @@ defm WriteVarBlend  : X86SchedWritePair;
 defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
 defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
 
-// Vector bitwise operations.
-// These are often used on both floating point and integer vectors.
-defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor.
-
 // MOVMSK operations.
 def WriteFMOVMSK : SchedWrite;
 def WriteVecMOVMSK : SchedWrite;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Fri Apr 20 14:16:05 2018
@@ -210,6 +210,8 @@ defm : AtomWriteResPair<WriteFRcp,
 defm : AtomWriteResPair<WriteFRsqrt,         [AtomPort0],  [AtomPort0],  4,  4,  [4],  [4]>;
 defm : AtomWriteResPair<WriteFDiv,          [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
 defm : AtomWriteResPair<WriteFSqrt,         [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
+defm : AtomWriteResPair<WriteFSign,          [AtomPort1],  [AtomPort1]>;
+defm : AtomWriteResPair<WriteFLogic,        [AtomPort01],  [AtomPort0]>;
 defm : AtomWriteResPair<WriteFShuffle,       [AtomPort0],  [AtomPort0]>;
 defm : AtomWriteResPair<WriteFVarShuffle,    [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
 defm : AtomWriteResPair<WriteFMA,            [AtomPort0],  [AtomPort0]>; // NOTE: Doesn't exist on Atom.
@@ -351,8 +353,7 @@ def AtomWrite1_1 : SchedWriteRes<[AtomPo
   let ResourceCycles = [1];
 }
 def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>;
-def : InstRW<[AtomWrite1_1], (instregex "ABS_F", "CHS_F",
-                                        "UCOM_F(P|PP)?r",
+def : InstRW<[AtomWrite1_1], (instregex "UCOM_F(P|PP)?r",
                                         "BT(C|R|S)?(16|32|64)(rr|ri8)")>;
 
 def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> {

Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Fri Apr 20 14:16:05 2018
@@ -300,6 +300,8 @@ defm : JWriteResFpuPair<WriteFRcp,
 defm : JWriteResFpuPair<WriteFRsqrt,       [JFPU1, JFPM],  2>;
 defm : JWriteResFpuPair<WriteFDiv,         [JFPU1, JFPM], 19, [1, 19]>;
 defm : JWriteResFpuPair<WriteFSqrt,        [JFPU1, JFPM], 21, [1, 21]>;
+defm : JWriteResFpuPair<WriteFSign,        [JFPU1, JFPM],  2>;
+defm : JWriteResFpuPair<WriteFLogic,      [JFPU01, JFPX],  1>;
 defm : JWriteResFpuPair<WriteFShuffle,    [JFPU01, JFPX],  1>;
 defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX],  2, [1, 4], 3>;
 defm : JWriteResFpuPair<WriteFBlend,      [JFPU01, JFPX],  1>;
@@ -532,25 +534,6 @@ def : InstRW<[JWriteCVTPH2PSYLd], (instr
 // AVX instructions.
 ////////////////////////////////////////////////////////////////////////////////
 
-def JWriteFLogic: SchedWriteRes<[JFPU01, JFPX]> {
-}
-def : InstRW<[JWriteFLogic], (instrs ORPDrr, ORPSrr, VORPDrr, VORPSrr,
-                                     XORPDrr, XORPSrr, VXORPDrr, VXORPSrr,
-                                     ANDPDrr, ANDPSrr, VANDPDrr, VANDPSrr,
-                                     ANDNPDrr, ANDNPSrr, VANDNPDrr, VANDNPSrr)>;
-
-def JWriteFLogicLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
-  let Latency = 6;
-}
-def : InstRW<[JWriteFLogicLd, ReadAfterLd], (instrs ORPDrm, ORPSrm,
-                                                    VORPDrm, VORPSrm,
-                                                    XORPDrm, XORPSrm,
-                                                    VXORPDrm, VXORPSrm,
-                                                    ANDPDrm, ANDPSrm,
-                                                    VANDPDrm, VANDPSrm,
-                                                    ANDNPDrm, ANDNPSrm,
-                                                    VANDNPDrm, VANDNPSrm)>;
-
 def JWriteFLogicY: SchedWriteRes<[JFPU01, JFPX]> {
   let ResourceCycles = [2, 2];
   let NumMicroOps = 2;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Fri Apr 20 14:16:05 2018
@@ -135,6 +135,8 @@ defm : SLMWriteResPair<WriteFSqrt,  [SLM
 defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>;
 defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>;
 defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>;
+defm : SLMWriteResPair<WriteFSign,  [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>;
 defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0],  1>;
 defm : SLMWriteResPair<WriteFBlend,  [SLM_FPC_RSV0],  1>;

Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Fri Apr 20 14:16:05 2018
@@ -202,6 +202,8 @@ defm : ZnWriteResFpuPair<WriteCvtI2F,
 defm : ZnWriteResFpuPair<WriteCvtF2F,    [ZnFPU3],  5>;
 defm : ZnWriteResFpuPair<WriteCvtF2I,    [ZnFPU3],  5>;
 defm : ZnWriteResFpuPair<WriteFDiv,      [ZnFPU3], 15>;
+defm : ZnWriteResFpuPair<WriteFSign,     [ZnFPU3],  2>;
+defm : ZnWriteResFpuPair<WriteFLogic,    [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteFShuffle,  [ZnFPU12], 1>;
 defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
 defm : ZnWriteResFpuPair<WriteFMul,      [ZnFPU0],  5>;
@@ -778,10 +780,6 @@ def : InstRW<[WriteMicrocoded], (instreg
 
 //-- Arithmetic instructions --//
 
-def ZnWriteFPU3Lat2 : SchedWriteRes<[ZnFPU3]> {
-  let Latency = 2;
-}
-
 def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ;
 
 def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ;
@@ -790,9 +788,6 @@ def ZnWriteFPU0Lat1Ld : SchedWriteRes<[Z
   let Latency = 8;
 }
 
-// FABS.
-def : InstRW<[ZnWriteFPU3Lat2], (instregex "ABS_F")>;
-
 // FCHS.
 def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>;
 
@@ -1672,15 +1667,6 @@ def ZnWriteRSQRTPSYLd : SchedWriteRes<[Z
 }
 def : InstRW<[ZnWriteRSQRTPSYLd], (instregex "VRSQRTPSYm")>;
 
-//-- Logic instructions --//
-
-// AND, ANDN, OR, XOR PS/PD.
-// x,x / v,v,v.
-def : InstRW<[WriteVecLogic], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>;
-// x,m / v,v,m.
-def : InstRW<[WriteVecLogicLd],
-                         (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>;
-
 //-- Other instructions --//
 
 // VZEROUPPER.

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Fri Apr 20 14:16:05 2018
@@ -654,7 +654,7 @@ define <32 x i8> @test_pabsb(<32 x i8> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpabsb %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpabsb (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsb:
@@ -704,7 +704,7 @@ define <8 x i32> @test_pabsd(<8 x i32> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpabsd %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpabsd (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsd:
@@ -754,7 +754,7 @@ define <16 x i16> @test_pabsw(<16 x i16>
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpabsw %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpabsw (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pabsw:
@@ -1367,8 +1367,8 @@ define <32 x i8> @test_palignr(<32 x i8>
 define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
 ; GENERIC-LABEL: test_pand:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpand (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT:    vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1416,8 +1416,8 @@ define <4 x i64> @test_pand(<4 x i64> %a
 define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
 ; GENERIC-LABEL: test_pandn:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpandn (%rdi), %ymm0, %ymm1 # sched: [6:1.00]
+; GENERIC-NEXT:    vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT:    vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5038,8 +5038,8 @@ declare <4 x i64> @llvm.x86.avx2.pmulu.d
 define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
 ; GENERIC-LABEL: test_por:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpor (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT:    vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5225,7 +5225,7 @@ define <16 x i16> @test_pshufhw(<16 x i1
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [6:1.00]
-; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshufhw:
@@ -5274,7 +5274,7 @@ define <16 x i16> @test_pshuflw(<16 x i1
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [6:1.00]
-; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pshuflw:
@@ -7064,8 +7064,8 @@ define <16 x i16> @test_punpcklwd(<16 x
 define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
 ; GENERIC-LABEL: test_pxor:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpxor (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT:    vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Fri Apr 20 14:16:05 2018
@@ -400,7 +400,7 @@ define <16 x i32> @vpaddd_broadcast_test
 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: vpaddd_mask_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -418,7 +418,7 @@ define <16 x i32> @vpaddd_mask_test(<16
 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: vpaddd_maskz_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -436,7 +436,7 @@ define <16 x i32> @vpaddd_maskz_test(<16
 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: vpaddd_mask_fold_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -455,7 +455,7 @@ define <16 x i32> @vpaddd_mask_fold_test
 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: vpaddd_mask_broadcast_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -473,7 +473,7 @@ define <16 x i32> @vpaddd_mask_broadcast
 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: vpaddd_maskz_fold_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -492,7 +492,7 @@ define <16 x i32> @vpaddd_maskz_fold_tes
 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -657,7 +657,7 @@ define <8 x i64> @addq_broadcast(<8 x i6
 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
 ; GENERIC-LABEL: orq_broadcast:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: orq_broadcast:
@@ -671,7 +671,7 @@ define <8 x i64> @orq_broadcast(<8 x i64
 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
 ; GENERIC-LABEL: andd512fold:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vandps (%rdi), %zmm0, %zmm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: andd512fold:
@@ -687,7 +687,7 @@ entry:
 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
 ; GENERIC-LABEL: andqbrst:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: andqbrst:
@@ -705,7 +705,7 @@ entry:
 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
 ; GENERIC-LABEL: test_mask_vaddps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -725,7 +725,7 @@ define <16 x float> @test_mask_vaddps(<1
 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: test_mask_vmulps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -743,7 +743,7 @@ define <16 x float> @test_mask_vmulps(<1
 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: test_mask_vminps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -762,7 +762,7 @@ define <16 x float> @test_mask_vminps(<1
 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: test_mask_vminpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -781,7 +781,7 @@ define <8 x double> @test_mask_vminpd(<8
 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: test_mask_vmaxps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -800,7 +800,7 @@ define <16 x float> @test_mask_vmaxps(<1
 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: test_mask_vmaxpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -819,7 +819,7 @@ define <8 x double> @test_mask_vmaxpd(<8
 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: test_mask_vsubps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -837,7 +837,7 @@ define <16 x float> @test_mask_vsubps(<1
 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
 ; GENERIC-LABEL: test_mask_vdivps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [24:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -855,7 +855,7 @@ define <16 x float> @test_mask_vdivps(<1
 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
 ; GENERIC-LABEL: test_mask_vaddpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -873,7 +873,7 @@ define <8 x double> @test_mask_vaddpd(<8
 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
 ; GENERIC-LABEL: test_maskz_vaddpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -891,7 +891,7 @@ define <8 x double> @test_maskz_vaddpd(<
 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j,  <8 x i64> %mask1) nounwind {
 ; GENERIC-LABEL: test_mask_fold_vaddpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -910,7 +910,7 @@ define <8 x double> @test_mask_fold_vadd
 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind {
 ; GENERIC-LABEL: test_maskz_fold_vaddpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -947,7 +947,7 @@ define <8 x double> @test_broadcast_vadd
 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind {
 ; GENERIC-LABEL: test_mask_broadcast_vaddpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [8:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -971,7 +971,7 @@ define <8 x double> @test_mask_broadcast
 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
 ; GENERIC-LABEL: test_maskz_broadcast_vaddpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [8:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -994,7 +994,7 @@ define <8 x double> @test_maskz_broadcas
 define <16 x float>  @test_fxor(<16 x float> %a) {
 ; GENERIC-LABEL: test_fxor:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_fxor:
@@ -1009,7 +1009,7 @@ define <16 x float>  @test_fxor(<16 x fl
 define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
 ; GENERIC-LABEL: test_fxor_8f32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_fxor_8f32:
@@ -1023,7 +1023,7 @@ define <8 x float>  @test_fxor_8f32(<8 x
 define <8 x double> @fabs_v8f64(<8 x double> %p)
 ; GENERIC-LABEL: fabs_v8f64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: fabs_v8f64:
@@ -1039,7 +1039,7 @@ declare <8 x double> @llvm.fabs.v8f64(<8
 define <16 x float> @fabs_v16f32(<16 x float> %p)
 ; GENERIC-LABEL: fabs_v16f32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: fabs_v16f32:
@@ -2829,7 +2829,7 @@ define <8 x float> @ubto8f32(<8 x i32> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: ubto8f32:
@@ -2869,7 +2869,7 @@ define <4 x float> @ubto4f32(<4 x i32> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [6:1.00]
+; GENERIC-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: ubto4f32:
@@ -4394,7 +4394,7 @@ define i16 @trunc_i32_to_i1(i32 %a) {
 ; GENERIC-NEXT:    kshiftlw $1, %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    andl $1, %edi # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovw %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    korw %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4809,7 +4809,7 @@ define <16 x float> @test_x86_fnmsub_ps_
 ; GENERIC-LABEL: test_x86_fnmsub_ps_z:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
+; GENERIC-NEXT:    vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [7:1.00]
 ; GENERIC-NEXT:    vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5029,7 +5029,7 @@ define <16 x i32> @vpandd(<16 x i32> %a,
 ; GENERIC-LABEL: vpandd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpandd:
@@ -5049,7 +5049,7 @@ define <16 x i32> @vpandnd(<16 x i32> %a
 ; GENERIC-LABEL: vpandnd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpandnd:
@@ -5071,7 +5071,7 @@ define <16 x i32> @vpord(<16 x i32> %a,
 ; GENERIC-LABEL: vpord:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpord:
@@ -5091,7 +5091,7 @@ define <16 x i32> @vpxord(<16 x i32> %a,
 ; GENERIC-LABEL: vpxord:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpxord:
@@ -5111,7 +5111,7 @@ define <8 x i64> @vpandq(<8 x i64> %a, <
 ; GENERIC-LABEL: vpandq:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpandq:
@@ -5130,7 +5130,7 @@ define <8 x i64> @vpandnq(<8 x i64> %a,
 ; GENERIC-LABEL: vpandnq:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpandnq:
@@ -5150,7 +5150,7 @@ define <8 x i64> @vporq(<8 x i64> %a, <8
 ; GENERIC-LABEL: vporq:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vporq:
@@ -5169,7 +5169,7 @@ define <8 x i64> @vpxorq(<8 x i64> %a, <
 ; GENERIC-LABEL: vpxorq:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
+; GENERIC-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpxorq:
@@ -5187,7 +5187,7 @@ entry:
 define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; GENERIC-LABEL: and_v64i8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: and_v64i8:
@@ -5201,7 +5201,7 @@ define <64 x i8> @and_v64i8(<64 x i8> %a
 define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; GENERIC-LABEL: andn_v64i8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: andn_v64i8:
@@ -5219,7 +5219,7 @@ define <64 x i8> @andn_v64i8(<64 x i8> %
 define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; GENERIC-LABEL: or_v64i8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: or_v64i8:
@@ -5233,7 +5233,7 @@ define <64 x i8> @or_v64i8(<64 x i8> %a,
 define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
 ; GENERIC-LABEL: xor_v64i8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: xor_v64i8:
@@ -5247,7 +5247,7 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a
 define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; GENERIC-LABEL: and_v32i16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: and_v32i16:
@@ -5261,7 +5261,7 @@ define <32 x i16> @and_v32i16(<32 x i16>
 define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; GENERIC-LABEL: andn_v32i16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: andn_v32i16:
@@ -5277,7 +5277,7 @@ define <32 x i16> @andn_v32i16(<32 x i16
 define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; GENERIC-LABEL: or_v32i16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: or_v32i16:
@@ -5291,7 +5291,7 @@ define <32 x i16> @or_v32i16(<32 x i16>
 define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
 ; GENERIC-LABEL: xor_v32i16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
+; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: xor_v32i16:
@@ -5306,7 +5306,7 @@ define <16 x float> @masked_and_v16f32(<
 ; GENERIC-LABEL: masked_and_v16f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5331,7 +5331,7 @@ define <16 x float> @masked_or_v16f32(<1
 ; GENERIC-LABEL: masked_or_v16f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5356,7 +5356,7 @@ define <16 x float> @masked_xor_v16f32(<
 ; GENERIC-LABEL: masked_xor_v16f32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5381,7 +5381,7 @@ define <8 x double> @masked_and_v8f64(<8
 ; GENERIC-LABEL: masked_and_v8f64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5406,7 +5406,7 @@ define <8 x double> @masked_or_v8f64(<8
 ; GENERIC-LABEL: masked_or_v8f64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5431,7 +5431,7 @@ define <8 x double> @masked_xor_v8f64(<8
 ; GENERIC-LABEL: masked_xor_v8f64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5456,7 +5456,7 @@ define <8 x i64> @test_mm512_mask_and_ep
 ; GENERIC-LABEL: test_mm512_mask_and_epi32:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_and_epi32:
@@ -5478,7 +5478,7 @@ define <8 x i64> @test_mm512_mask_or_epi
 ; GENERIC-LABEL: test_mm512_mask_or_epi32:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_or_epi32:
@@ -5500,7 +5500,7 @@ define <8 x i64> @test_mm512_mask_xor_ep
 ; GENERIC-LABEL: test_mm512_mask_xor_epi32:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_xor_epi32:
@@ -5522,7 +5522,7 @@ define <8 x double> @test_mm512_mask_xor
 ; GENERIC-LABEL: test_mm512_mask_xor_pd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_xor_pd:
@@ -5544,7 +5544,7 @@ define <8 x double> @test_mm512_maskz_xo
 ; GENERIC-LABEL: test_mm512_maskz_xor_pd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT:    vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_maskz_xor_pd:
@@ -5566,7 +5566,7 @@ define <16 x float> @test_mm512_mask_xor
 ; GENERIC-LABEL: test_mm512_mask_xor_ps:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_xor_ps:
@@ -5588,7 +5588,7 @@ define <16 x float> @test_mm512_maskz_xo
 ; GENERIC-LABEL: test_mm512_maskz_xor_ps:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_maskz_xor_ps:
@@ -5610,7 +5610,7 @@ define <8 x double> @test_mm512_mask_or_
 ; GENERIC-LABEL: test_mm512_mask_or_pd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_or_pd:
@@ -5632,7 +5632,7 @@ define <8 x double> @test_mm512_maskz_or
 ; GENERIC-LABEL: test_mm512_maskz_or_pd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT:    vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_maskz_or_pd:
@@ -5654,7 +5654,7 @@ define <16 x float> @test_mm512_mask_or_
 ; GENERIC-LABEL: test_mm512_mask_or_ps:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_or_ps:
@@ -5676,7 +5676,7 @@ define <16 x float> @test_mm512_maskz_or
 ; GENERIC-LABEL: test_mm512_maskz_or_ps:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT:    vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_maskz_or_ps:
@@ -5698,7 +5698,7 @@ define <8 x double> @test_mm512_mask_and
 ; GENERIC-LABEL: test_mm512_mask_and_pd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_and_pd:
@@ -5720,7 +5720,7 @@ define <8 x double> @test_mm512_maskz_an
 ; GENERIC-LABEL: test_mm512_maskz_and_pd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT:    vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_maskz_and_pd:
@@ -5742,7 +5742,7 @@ define <16 x float> @test_mm512_mask_and
 ; GENERIC-LABEL: test_mm512_mask_and_ps:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_and_ps:
@@ -5764,7 +5764,7 @@ define <16 x float> @test_mm512_maskz_an
 ; GENERIC-LABEL: test_mm512_maskz_and_ps:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT:    vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_maskz_and_ps:
@@ -5786,7 +5786,7 @@ define <8 x double> @test_mm512_mask_and
 ; GENERIC-LABEL: test_mm512_mask_andnot_pd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_andnot_pd:
@@ -5809,7 +5809,7 @@ define <8 x double> @test_mm512_maskz_an
 ; GENERIC-LABEL: test_mm512_maskz_andnot_pd:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT:    vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_maskz_andnot_pd:
@@ -5832,7 +5832,7 @@ define <16 x float> @test_mm512_mask_and
 ; GENERIC-LABEL: test_mm512_mask_andnot_ps:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
+; GENERIC-NEXT:    vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_mask_andnot_ps:
@@ -5855,7 +5855,7 @@ define <16 x float> @test_mm512_maskz_an
 ; GENERIC-LABEL: test_mm512_maskz_andnot_ps:
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
+; GENERIC-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test_mm512_maskz_andnot_ps:
@@ -6349,7 +6349,7 @@ define <16 x float> @mov_test31(i8 * %ad
 define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
 ; GENERIC-LABEL: mov_test32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6368,7 +6368,7 @@ define <16 x i32> @mov_test32(i8 * %addr
 define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
 ; GENERIC-LABEL: mov_test33:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6387,7 +6387,7 @@ define <16 x i32> @mov_test33(i8 * %addr
 define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) {
 ; GENERIC-LABEL: mov_test34:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6406,7 +6406,7 @@ define <16 x i32> @mov_test34(i8 * %addr
 define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) {
 ; GENERIC-LABEL: mov_test35:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6425,7 +6425,7 @@ define <16 x i32> @mov_test35(i8 * %addr
 define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
 ; GENERIC-LABEL: mov_test36:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6444,7 +6444,7 @@ define <8 x i64> @mov_test36(i8 * %addr,
 define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
 ; GENERIC-LABEL: mov_test37:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6463,7 +6463,7 @@ define <8 x i64> @mov_test37(i8 * %addr,
 define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) {
 ; GENERIC-LABEL: mov_test38:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6482,7 +6482,7 @@ define <8 x i64> @mov_test38(i8 * %addr,
 define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) {
 ; GENERIC-LABEL: mov_test39:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6744,7 +6744,7 @@ define void @mask16_mem(i16* %ptr) {
 ; GENERIC-LABEL: mask16_mem:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6766,7 +6766,7 @@ define void @mask8_mem(i8* %ptr) {
 ; GENERIC-LABEL: mask8_mem:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovb (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6816,9 +6816,9 @@ define i16 @mand16_mem(<16 x i1>* %x, <1
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovw (%rdi), %k0 # sched: [5:0.50]
 ; GENERIC-NEXT:    kmovw (%rsi), %k1 # sched: [5:0.50]
-; GENERIC-NEXT:    kandw %k1, %k0, %k2 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    korw %k0, %k2, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kandw %k1, %k0, %k2 # sched: [1:0.33]
+; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    korw %k0, %k2, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7053,7 +7053,7 @@ define <16 x i8> @vcmp_test8(<16 x i32>%
 ; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
 ; GENERIC-NEXT:    jg .LBB386_1 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.2:
-; GENERIC-NEXT:    kxorw %k0, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorw %k0, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7230,10 +7230,10 @@ define <64 x i8> @vmov_test16(i64 %x) {
 ; GENERIC-NEXT:    movb $1, %al # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    kshiftrq $5, %k0, %k2 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    kshiftlq $63, %k1, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    kshiftrq $58, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7264,10 +7264,10 @@ define <64 x i8> @vmov_test17(i64 %x, i3
 ; GENERIC-NEXT:    setg %al # sched: [1:0.50]
 ; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    kshiftrq $5, %k0, %k2 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorq %k1, %k2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    kshiftlq $63, %k1, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    kshiftrq $58, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorq %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7299,14 +7299,14 @@ define <8 x i1> @vmov_test18(i8 %a, i16
 ; GENERIC-NEXT:    kshiftrw $8, %k2, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kshiftrw $9, %k2, %k2 # sched: [1:1.00]
 ; GENERIC-NEXT:    kshiftrb $6, %k1, %k3 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorb %k2, %k3, %k2 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorb %k2, %k3, %k2 # sched: [1:0.33]
 ; GENERIC-NEXT:    kshiftlb $7, %k2, %k2 # sched: [1:1.00]
 ; GENERIC-NEXT:    kshiftrb $1, %k2, %k2 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorb %k2, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorb %k2, %k1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    kshiftlb $1, %k1, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    kshiftrb $1, %k1, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    kshiftlb $7, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    korb %k0, %k1, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    korb %k0, %k1, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7393,8 +7393,8 @@ define void @store_v1i1(<1 x i1> %c , <1
 ; GENERIC-LABEL: store_v1i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT:    kxnorw %k0, %k0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kxnorw %k0, %k0, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, (%rsi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7415,7 +7415,7 @@ define void @store_v2i1(<2 x i1> %c , <2
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovq2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7436,7 +7436,7 @@ define void @store_v4i1(<4 x i1> %c , <4
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovd2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7457,7 +7457,7 @@ define void @store_v8i1(<8 x i1> %c , <8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7478,7 +7478,7 @@ define void @store_v16i1(<16 x i1> %c ,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovw %k0, (%rdi) # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7607,7 +7607,7 @@ define void @ktest_1(<8 x double> %in, d
 ; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [6:0.50]
 ; GENERIC-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT:    kortestb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kortestb %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    je .LBB410_2 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.1: # %L1
 ; GENERIC-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
@@ -7675,7 +7675,7 @@ define void @ktest_2(<32 x float> %in, f
 ; GENERIC-NEXT:    vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
 ; GENERIC-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT:    kortestd %k1, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kortestd %k1, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    je .LBB411_2 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.1: # %L1
 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
@@ -7962,7 +7962,7 @@ define void @store_64i1(<64 x i1>* %a, <
 define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
 ; GENERIC-LABEL: test_bitcast_v8i1_zext:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
@@ -7986,7 +7986,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x
 define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
 ; GENERIC-LABEL: test_bitcast_v16i1_zext:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovw %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
@@ -8011,7 +8011,7 @@ define i16 @test_v16i1_add(i16 %x, i16 %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8036,7 +8036,7 @@ define i16 @test_v16i1_sub(i16 %x, i16 %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8061,7 +8061,7 @@ define i16 @test_v16i1_mul(i16 %x, i16 %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    kandw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kandw %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8086,7 +8086,7 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) {
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8111,7 +8111,7 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) {
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8136,7 +8136,7 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) {
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    kandb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kandb %k1, %k0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: def $al killed $al killed $eax
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8218,7 +8218,7 @@ define   <16 x float> @_inreg16xfloat(fl
 define   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
 ; GENERIC-LABEL: _ss16xfloat_mask:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8239,7 +8239,7 @@ define   <16 x float> @_ss16xfloat_mask(
 define   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
 ; GENERIC-LABEL: _ss16xfloat_maskz:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8274,7 +8274,7 @@ define   <16 x float> @_ss16xfloat_load(
 define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
 ; GENERIC-LABEL: _ss16xfloat_mask_load:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8294,7 +8294,7 @@ define   <16 x float> @_ss16xfloat_mask_
 define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
 ; GENERIC-LABEL: _ss16xfloat_maskz_load:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8329,7 +8329,7 @@ define   <8 x double> @_inreg8xdouble(do
 define   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
 ; GENERIC-LABEL: _sd8xdouble_mask:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8350,7 +8350,7 @@ define   <8 x double> @_sd8xdouble_mask(
 define   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
 ; GENERIC-LABEL: _sd8xdouble_maskz:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8385,7 +8385,7 @@ define   <8 x double> @_sd8xdouble_load(
 define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
 ; GENERIC-LABEL: _sd8xdouble_mask_load:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8405,7 +8405,7 @@ define   <8 x double> @_sd8xdouble_mask_
 define   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
 ; GENERIC-LABEL: _sd8xdouble_maskz_load:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8456,7 +8456,7 @@ define <16 x i32> @test_vbroadcast() {
 ; GENERIC-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    knotw %k0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    knotw %k0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Fri Apr 20 14:16:05 2018
@@ -23,7 +23,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -45,7 +45,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -64,7 +64,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -86,7 +86,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -105,7 +105,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -127,7 +127,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -161,7 +161,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -183,7 +183,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -218,7 +218,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -239,7 +239,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -260,7 +260,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -281,7 +281,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -302,7 +302,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -323,7 +323,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -360,7 +360,7 @@ define <16 x i16> @test_masked_16xi16_pe
 ; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -381,7 +381,7 @@ define <16 x i16> @test_masked_z_16xi16_
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -417,7 +417,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -439,7 +439,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -458,7 +458,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -480,7 +480,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -499,7 +499,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -521,7 +521,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -555,7 +555,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -577,7 +577,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -612,7 +612,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -633,7 +633,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -654,7 +654,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -675,7 +675,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -696,7 +696,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -717,7 +717,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -754,7 +754,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -775,7 +775,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -811,7 +811,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC-LABEL: test_masked_8xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -833,7 +833,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -852,7 +852,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC-LABEL: test_masked_8xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -874,7 +874,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -893,7 +893,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC-LABEL: test_masked_8xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -915,7 +915,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -949,7 +949,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC-LABEL: test_masked_8xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -971,7 +971,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1006,7 +1006,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1027,7 +1027,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1048,7 +1048,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1069,7 +1069,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1090,7 +1090,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1111,7 +1111,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1148,7 +1148,7 @@ define <8 x i32> @test_masked_8xi32_perm
 ; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1169,7 +1169,7 @@ define <8 x i32> @test_masked_z_8xi32_pe
 ; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1205,7 +1205,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC-LABEL: test_masked_16xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1227,7 +1227,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1246,7 +1246,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC-LABEL: test_masked_16xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1268,7 +1268,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1287,7 +1287,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC-LABEL: test_masked_16xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1309,7 +1309,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1343,7 +1343,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC-LABEL: test_masked_16xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1365,7 +1365,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1400,7 +1400,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1421,7 +1421,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1442,7 +1442,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1463,7 +1463,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1484,7 +1484,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1505,7 +1505,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1542,7 +1542,7 @@ define <16 x i32> @test_masked_16xi32_pe
 ; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1563,7 +1563,7 @@ define <16 x i32> @test_masked_z_16xi32_
 ; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1596,7 +1596,7 @@ define <4 x i64> @test_4xi64_perm_mask0(
 define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xi64_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1616,7 +1616,7 @@ define <4 x i64> @test_masked_4xi64_perm
 define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1633,7 +1633,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xi64_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1653,7 +1653,7 @@ define <4 x i64> @test_masked_4xi64_perm
 define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1670,7 +1670,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xi64_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1690,7 +1690,7 @@ define <4 x i64> @test_masked_4xi64_perm
 define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1720,7 +1720,7 @@ define <4 x i64> @test_4xi64_perm_mask3(
 define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xi64_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1740,7 +1740,7 @@ define <4 x i64> @test_masked_4xi64_perm
 define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1771,7 +1771,7 @@ define <4 x i64> @test_4xi64_perm_mem_ma
 define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1790,7 +1790,7 @@ define <4 x i64> @test_masked_4xi64_perm
 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1809,7 +1809,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1828,7 +1828,7 @@ define <4 x i64> @test_masked_4xi64_perm
 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1847,7 +1847,7 @@ define <4 x i64> @test_masked_z_4xi64_pe
 define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1866,7 +1866,7 @@ define <4 x i64> @test_masked_4xi64_perm
 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1899,7 +1899,7 @@ define <4 x i64> @test_4xi64_perm_mem_ma
 define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1918,7 +1918,7 @@ define <4 x i64> @test_masked_4xi64_perm
 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1953,7 +1953,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1975,7 +1975,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1993,7 +1993,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2013,7 +2013,7 @@ define <8 x i64> @test_masked_8xi64_perm
 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2031,7 +2031,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2053,7 +2053,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2084,7 +2084,7 @@ define <8 x i64> @test_8xi64_perm_imm_ma
 define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2104,7 +2104,7 @@ define <8 x i64> @test_masked_8xi64_perm
 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2122,7 +2122,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2144,7 +2144,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2162,7 +2162,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2182,7 +2182,7 @@ define <8 x i64> @test_masked_8xi64_perm
 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2215,7 +2215,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2237,7 +2237,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2255,7 +2255,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2275,7 +2275,7 @@ define <8 x i64> @test_masked_8xi64_perm
 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2309,7 +2309,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2330,7 +2330,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2350,7 +2350,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2369,7 +2369,7 @@ define <8 x i64> @test_masked_8xi64_perm
 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2389,7 +2389,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2410,7 +2410,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2444,7 +2444,7 @@ define <8 x i64> @test_8xi64_perm_imm_me
 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2463,7 +2463,7 @@ define <8 x i64> @test_masked_8xi64_perm
 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2483,7 +2483,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2504,7 +2504,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2524,7 +2524,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2543,7 +2543,7 @@ define <8 x i64> @test_masked_8xi64_perm
 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2579,7 +2579,7 @@ define <8 x i64> @test_masked_8xi64_perm
 ; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2600,7 +2600,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2620,7 +2620,7 @@ define <8 x i64> @test_masked_z_8xi64_pe
 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2639,7 +2639,7 @@ define <8 x i64> @test_masked_8xi64_perm
 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2674,7 +2674,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC-LABEL: test_masked_8xfloat_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2696,7 +2696,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2715,7 +2715,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC-LABEL: test_masked_8xfloat_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2737,7 +2737,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2756,7 +2756,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC-LABEL: test_masked_8xfloat_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2778,7 +2778,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2812,7 +2812,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC-LABEL: test_masked_8xfloat_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2834,7 +2834,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2869,7 +2869,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2890,7 +2890,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2911,7 +2911,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2932,7 +2932,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2953,7 +2953,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2974,7 +2974,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3011,7 +3011,7 @@ define <8 x float> @test_masked_8xfloat_
 ; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3032,7 +3032,7 @@ define <8 x float> @test_masked_z_8xfloa
 ; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3068,7 +3068,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC-LABEL: test_masked_16xfloat_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3090,7 +3090,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3109,7 +3109,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC-LABEL: test_masked_16xfloat_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3131,7 +3131,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3150,7 +3150,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC-LABEL: test_masked_16xfloat_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3172,7 +3172,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3206,7 +3206,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC-LABEL: test_masked_16xfloat_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3228,7 +3228,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3263,7 +3263,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3284,7 +3284,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3305,7 +3305,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3326,7 +3326,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3347,7 +3347,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3368,7 +3368,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3405,7 +3405,7 @@ define <16 x float> @test_masked_16xfloa
 ; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3426,7 +3426,7 @@ define <16 x float> @test_masked_z_16xfl
 ; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3459,7 +3459,7 @@ define <4 x double> @test_4xdouble_perm_
 define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3479,7 +3479,7 @@ define <4 x double> @test_masked_4xdoubl
 define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3496,7 +3496,7 @@ define <4 x double> @test_masked_z_4xdou
 define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3516,7 +3516,7 @@ define <4 x double> @test_masked_4xdoubl
 define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3533,7 +3533,7 @@ define <4 x double> @test_masked_z_4xdou
 define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3553,7 +3553,7 @@ define <4 x double> @test_masked_4xdoubl
 define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3583,7 +3583,7 @@ define <4 x double> @test_4xdouble_perm_
 define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3603,7 +3603,7 @@ define <4 x double> @test_masked_4xdoubl
 define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3634,7 +3634,7 @@ define <4 x double> @test_4xdouble_perm_
 define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3653,7 +3653,7 @@ define <4 x double> @test_masked_4xdoubl
 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3672,7 +3672,7 @@ define <4 x double> @test_masked_z_4xdou
 define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3691,7 +3691,7 @@ define <4 x double> @test_masked_4xdoubl
 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3710,7 +3710,7 @@ define <4 x double> @test_masked_z_4xdou
 define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3729,7 +3729,7 @@ define <4 x double> @test_masked_4xdoubl
 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3762,7 +3762,7 @@ define <4 x double> @test_4xdouble_perm_
 define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3781,7 +3781,7 @@ define <4 x double> @test_masked_4xdoubl
 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3816,7 +3816,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3838,7 +3838,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3856,7 +3856,7 @@ define <8 x double> @test_masked_z_8xdou
 define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3876,7 +3876,7 @@ define <8 x double> @test_masked_8xdoubl
 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3894,7 +3894,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3916,7 +3916,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3947,7 +3947,7 @@ define <8 x double> @test_8xdouble_perm_
 define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -3967,7 +3967,7 @@ define <8 x double> @test_masked_8xdoubl
 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3985,7 +3985,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4007,7 +4007,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4025,7 +4025,7 @@ define <8 x double> @test_masked_z_8xdou
 define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4045,7 +4045,7 @@ define <8 x double> @test_masked_8xdoubl
 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4078,7 +4078,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4100,7 +4100,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4118,7 +4118,7 @@ define <8 x double> @test_masked_z_8xdou
 define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4138,7 +4138,7 @@ define <8 x double> @test_masked_8xdoubl
 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4172,7 +4172,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4193,7 +4193,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4213,7 +4213,7 @@ define <8 x double> @test_masked_z_8xdou
 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4232,7 +4232,7 @@ define <8 x double> @test_masked_8xdoubl
 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4252,7 +4252,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4273,7 +4273,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4307,7 +4307,7 @@ define <8 x double> @test_8xdouble_perm_
 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4326,7 +4326,7 @@ define <8 x double> @test_masked_8xdoubl
 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4346,7 +4346,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4367,7 +4367,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask4:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4387,7 +4387,7 @@ define <8 x double> @test_masked_z_8xdou
 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4406,7 +4406,7 @@ define <8 x double> @test_masked_8xdoubl
 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4442,7 +4442,7 @@ define <8 x double> @test_masked_8xdoubl
 ; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4463,7 +4463,7 @@ define <8 x double> @test_masked_z_8xdou
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask6:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4483,7 +4483,7 @@ define <8 x double> @test_masked_z_8xdou
 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4502,7 +4502,7 @@ define <8 x double> @test_masked_8xdoubl
 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4534,7 +4534,7 @@ define <16 x i8> @test_16xi8_perm_mask0(
 define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4554,7 +4554,7 @@ define <16 x i8> @test_masked_16xi8_perm
 define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4571,7 +4571,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4591,7 +4591,7 @@ define <16 x i8> @test_masked_16xi8_perm
 define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4608,7 +4608,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4628,7 +4628,7 @@ define <16 x i8> @test_masked_16xi8_perm
 define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4658,7 +4658,7 @@ define <16 x i8> @test_16xi8_perm_mask3(
 define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_16xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4678,7 +4678,7 @@ define <16 x i8> @test_masked_16xi8_perm
 define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4712,7 +4712,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4733,7 +4733,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4754,7 +4754,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4775,7 +4775,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4796,7 +4796,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4817,7 +4817,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4854,7 +4854,7 @@ define <16 x i8> @test_masked_16xi8_perm
 ; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4875,7 +4875,7 @@ define <16 x i8> @test_masked_z_16xi8_pe
 ; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4908,7 +4908,7 @@ define <32 x i8> @test_32xi8_perm_mask0(
 define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4928,7 +4928,7 @@ define <32 x i8> @test_masked_32xi8_perm
 define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4945,7 +4945,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4965,7 +4965,7 @@ define <32 x i8> @test_masked_32xi8_perm
 define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4982,7 +4982,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5002,7 +5002,7 @@ define <32 x i8> @test_masked_32xi8_perm
 define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5032,7 +5032,7 @@ define <32 x i8> @test_32xi8_perm_mask3(
 define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_32xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5052,7 +5052,7 @@ define <32 x i8> @test_masked_32xi8_perm
 define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5086,7 +5086,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5107,7 +5107,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5128,7 +5128,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5149,7 +5149,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5170,7 +5170,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5191,7 +5191,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5228,7 +5228,7 @@ define <32 x i8> @test_masked_32xi8_perm
 ; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5249,7 +5249,7 @@ define <32 x i8> @test_masked_z_32xi8_pe
 ; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5282,7 +5282,7 @@ define <64 x i8> @test_64xi8_perm_mask0(
 define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5302,7 +5302,7 @@ define <64 x i8> @test_masked_64xi8_perm
 define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5319,7 +5319,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5339,7 +5339,7 @@ define <64 x i8> @test_masked_64xi8_perm
 define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5356,7 +5356,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5376,7 +5376,7 @@ define <64 x i8> @test_masked_64xi8_perm
 define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5406,7 +5406,7 @@ define <64 x i8> @test_64xi8_perm_mask3(
 define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_64xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5426,7 +5426,7 @@ define <64 x i8> @test_masked_64xi8_perm
 define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) {
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5460,7 +5460,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5481,7 +5481,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5502,7 +5502,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5523,7 +5523,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5544,7 +5544,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5565,7 +5565,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5602,7 +5602,7 @@ define <64 x i8> @test_masked_64xi8_perm
 ; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm2 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5623,7 +5623,7 @@ define <64 x i8> @test_masked_z_64xi8_pe
 ; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm1 # sched: [6:0.50]
-; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [6:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5656,7 +5656,7 @@ define <8 x i16> @test_8xi16_perm_high_m
 define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5676,7 +5676,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5693,7 +5693,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5713,7 +5713,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5730,7 +5730,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5750,7 +5750,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5780,7 +5780,7 @@ define <8 x i16> @test_8xi16_perm_low_ma
 define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5800,7 +5800,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5817,7 +5817,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5837,7 +5837,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5854,7 +5854,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5874,7 +5874,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5904,7 +5904,7 @@ define <8 x i16> @test_8xi16_perm_high_m
 define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5924,7 +5924,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5941,7 +5941,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5961,7 +5961,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5992,7 +5992,7 @@ define <8 x i16> @test_8xi16_perm_high_m
 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6011,7 +6011,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6030,7 +6030,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6049,7 +6049,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6068,7 +6068,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6087,7 +6087,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6120,7 +6120,7 @@ define <8 x i16> @test_8xi16_perm_low_me
 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6139,7 +6139,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6158,7 +6158,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6177,7 +6177,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6196,7 +6196,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6215,7 +6215,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6248,7 +6248,7 @@ define <8 x i16> @test_8xi16_perm_high_m
 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6267,7 +6267,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6286,7 +6286,7 @@ define <8 x i16> @test_masked_z_8xi16_pe
 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6305,7 +6305,7 @@ define <8 x i16> @test_masked_8xi16_perm
 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6337,7 +6337,7 @@ define <16 x i16> @test_16xi16_perm_high
 define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6357,7 +6357,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6374,7 +6374,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6394,7 +6394,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6411,7 +6411,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6431,7 +6431,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6461,7 +6461,7 @@ define <16 x i16> @test_16xi16_perm_low_
 define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6481,7 +6481,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6498,7 +6498,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6518,7 +6518,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6535,7 +6535,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6555,7 +6555,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6585,7 +6585,7 @@ define <16 x i16> @test_16xi16_perm_high
 define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6605,7 +6605,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6622,7 +6622,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6642,7 +6642,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6673,7 +6673,7 @@ define <16 x i16> @test_16xi16_perm_high
 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6692,7 +6692,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6711,7 +6711,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6730,7 +6730,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6749,7 +6749,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6768,7 +6768,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6801,7 +6801,7 @@ define <16 x i16> @test_16xi16_perm_low_
 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6820,7 +6820,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6839,7 +6839,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6858,7 +6858,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6877,7 +6877,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6896,7 +6896,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6929,7 +6929,7 @@ define <16 x i16> @test_16xi16_perm_high
 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6948,7 +6948,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6967,7 +6967,7 @@ define <16 x i16> @test_masked_z_16xi16_
 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6986,7 +6986,7 @@ define <16 x i16> @test_masked_16xi16_pe
 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7018,7 +7018,7 @@ define <32 x i16> @test_32xi16_perm_high
 define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7038,7 +7038,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7055,7 +7055,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7075,7 +7075,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7092,7 +7092,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7112,7 +7112,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7142,7 +7142,7 @@ define <32 x i16> @test_32xi16_perm_low_
 define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7162,7 +7162,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7179,7 +7179,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7199,7 +7199,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7216,7 +7216,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7236,7 +7236,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask5:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7266,7 +7266,7 @@ define <32 x i16> @test_32xi16_perm_high
 define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7286,7 +7286,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7303,7 +7303,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7323,7 +7323,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7354,7 +7354,7 @@ define <32 x i16> @test_32xi16_perm_high
 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7373,7 +7373,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7392,7 +7392,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7411,7 +7411,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7430,7 +7430,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7449,7 +7449,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7482,7 +7482,7 @@ define <32 x i16> @test_32xi16_perm_low_
 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7501,7 +7501,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7520,7 +7520,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7539,7 +7539,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7559,7 +7559,7 @@ define <32 x i16> @test_masked_32xi16_pe
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00]
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7580,7 +7580,7 @@ define <32 x i16> @test_masked_z_32xi16_
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [6:1.00]
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7614,7 +7614,7 @@ define <32 x i16> @test_32xi16_perm_high
 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7633,7 +7633,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7652,7 +7652,7 @@ define <32 x i16> @test_masked_z_32xi16_
 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7671,7 +7671,7 @@ define <32 x i16> @test_masked_32xi16_pe
 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) {
 ; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7703,7 +7703,7 @@ define <4 x i32> @test_4xi32_perm_mask0(
 define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7723,7 +7723,7 @@ define <4 x i32> @test_masked_4xi32_perm
 define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7740,7 +7740,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7760,7 +7760,7 @@ define <4 x i32> @test_masked_4xi32_perm
 define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7777,7 +7777,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7797,7 +7797,7 @@ define <4 x i32> @test_masked_4xi32_perm
 define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7827,7 +7827,7 @@ define <4 x i32> @test_4xi32_perm_mask3(
 define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_4xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %xmm1, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7847,7 +7847,7 @@ define <4 x i32> @test_masked_4xi32_perm
 define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7878,7 +7878,7 @@ define <4 x i32> @test_4xi32_perm_mem_ma
 define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7897,7 +7897,7 @@ define <4 x i32> @test_masked_4xi32_perm
 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7916,7 +7916,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7935,7 +7935,7 @@ define <4 x i32> @test_masked_4xi32_perm
 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7954,7 +7954,7 @@ define <4 x i32> @test_masked_z_4xi32_pe
 define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7973,7 +7973,7 @@ define <4 x i32> @test_masked_4xi32_perm
 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8006,7 +8006,7 @@ define <4 x i32> @test_4xi32_perm_mem_ma
 define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8025,7 +8025,7 @@ define <4 x i32> @test_masked_4xi32_perm
 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8057,7 +8057,7 @@ define <8 x i32> @test2_8xi32_perm_mask0
 define <8 x i32> @test2_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8077,7 +8077,7 @@ define <8 x i32> @test2_masked_8xi32_per
 define <8 x i32> @test2_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8094,7 +8094,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 define <8 x i32> @test2_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8114,7 +8114,7 @@ define <8 x i32> @test2_masked_8xi32_per
 define <8 x i32> @test2_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8131,7 +8131,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 define <8 x i32> @test2_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8151,7 +8151,7 @@ define <8 x i32> @test2_masked_8xi32_per
 define <8 x i32> @test2_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8181,7 +8181,7 @@ define <8 x i32> @test2_8xi32_perm_mask3
 define <8 x i32> @test2_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8201,7 +8201,7 @@ define <8 x i32> @test2_masked_8xi32_per
 define <8 x i32> @test2_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8232,7 +8232,7 @@ define <8 x i32> @test2_8xi32_perm_mem_m
 define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8251,7 +8251,7 @@ define <8 x i32> @test2_masked_8xi32_per
 define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8270,7 +8270,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8289,7 +8289,7 @@ define <8 x i32> @test2_masked_8xi32_per
 define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8308,7 +8308,7 @@ define <8 x i32> @test2_masked_z_8xi32_p
 define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8327,7 +8327,7 @@ define <8 x i32> @test2_masked_8xi32_per
 define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8360,7 +8360,7 @@ define <8 x i32> @test2_8xi32_perm_mem_m
 define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8379,7 +8379,7 @@ define <8 x i32> @test2_masked_8xi32_per
 define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8411,7 +8411,7 @@ define <16 x i32> @test2_16xi32_perm_mas
 define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8431,7 +8431,7 @@ define <16 x i32> @test2_masked_16xi32_p
 define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8448,7 +8448,7 @@ define <16 x i32> @test2_masked_z_16xi32
 define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8468,7 +8468,7 @@ define <16 x i32> @test2_masked_16xi32_p
 define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8485,7 +8485,7 @@ define <16 x i32> @test2_masked_z_16xi32
 define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8505,7 +8505,7 @@ define <16 x i32> @test2_masked_16xi32_p
 define <16 x i32> @test2_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8535,7 +8535,7 @@ define <16 x i32> @test2_16xi32_perm_mas
 define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8555,7 +8555,7 @@ define <16 x i32> @test2_masked_16xi32_p
 define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8586,7 +8586,7 @@ define <16 x i32> @test2_16xi32_perm_mem
 define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8605,7 +8605,7 @@ define <16 x i32> @test2_masked_16xi32_p
 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8624,7 +8624,7 @@ define <16 x i32> @test2_masked_z_16xi32
 define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8643,7 +8643,7 @@ define <16 x i32> @test2_masked_16xi32_p
 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8662,7 +8662,7 @@ define <16 x i32> @test2_masked_z_16xi32
 define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8681,7 +8681,7 @@ define <16 x i32> @test2_masked_16xi32_p
 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8714,7 +8714,7 @@ define <16 x i32> @test2_16xi32_perm_mem
 define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8733,7 +8733,7 @@ define <16 x i32> @test2_masked_16xi32_p
 define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) {
 ; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8765,7 +8765,7 @@ define <8 x float> @test2_8xfloat_shuff_
 define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8785,7 +8785,7 @@ define <8 x float> @test2_8xfloat_masked
 define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8802,7 +8802,7 @@ define <8 x float> @test2_8xfloat_zero_m
 define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8822,7 +8822,7 @@ define <8 x float> @test2_8xfloat_masked
 define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8839,7 +8839,7 @@ define <8 x float> @test2_8xfloat_zero_m
 define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8859,7 +8859,7 @@ define <8 x float> @test2_8xfloat_masked
 define <8 x float> @test2_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8889,7 +8889,7 @@ define <8 x float> @test2_8xfloat_shuff_
 define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8909,7 +8909,7 @@ define <8 x float> @test2_8xfloat_masked
 define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8940,7 +8940,7 @@ define <8 x float> @test_8xfloat_shuff_m
 define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8961,7 +8961,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8980,7 +8980,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9001,7 +9001,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9020,7 +9020,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9041,7 +9041,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9074,7 +9074,7 @@ define <8 x float> @test_8xfloat_shuff_m
 define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9095,7 +9095,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9127,7 +9127,7 @@ define <16 x float> @test_16xfloat_shuff
 define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9147,7 +9147,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9164,7 +9164,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9184,7 +9184,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9201,7 +9201,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9221,7 +9221,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9251,7 +9251,7 @@ define <16 x float> @test_16xfloat_shuff
 define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9271,7 +9271,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9302,7 +9302,7 @@ define <16 x float> @test_16xfloat_shuff
 define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9323,7 +9323,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9342,7 +9342,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9363,7 +9363,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9382,7 +9382,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9403,7 +9403,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9436,7 +9436,7 @@ define <16 x float> @test_16xfloat_shuff
 define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9457,7 +9457,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9489,7 +9489,7 @@ define <4 x double> @test_4xdouble_shuff
 define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9509,7 +9509,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9526,7 +9526,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9546,7 +9546,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9563,7 +9563,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9583,7 +9583,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9613,7 +9613,7 @@ define <4 x double> @test_4xdouble_shuff
 define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9633,7 +9633,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9664,7 +9664,7 @@ define <4 x double> @test_4xdouble_shuff
 define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9685,7 +9685,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9704,7 +9704,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9725,7 +9725,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9744,7 +9744,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9765,7 +9765,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9798,7 +9798,7 @@ define <4 x double> @test_4xdouble_shuff
 define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9819,7 +9819,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9851,7 +9851,7 @@ define <8 x double> @test_8xdouble_shuff
 define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9871,7 +9871,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9888,7 +9888,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9908,7 +9908,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9925,7 +9925,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9945,7 +9945,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -9975,7 +9975,7 @@ define <8 x double> @test_8xdouble_shuff
 define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -9995,7 +9995,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10026,7 +10026,7 @@ define <8 x double> @test_8xdouble_shuff
 define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10047,7 +10047,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10066,7 +10066,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10087,7 +10087,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10106,7 +10106,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10127,7 +10127,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10160,7 +10160,7 @@ define <8 x double> @test_8xdouble_shuff
 define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10181,7 +10181,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10213,7 +10213,7 @@ define <8 x i32> @test_8xi32_shuff_mask0
 define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10233,7 +10233,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10250,7 +10250,7 @@ define <8 x i32> @test_8xi32_zero_masked
 define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10270,7 +10270,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10287,7 +10287,7 @@ define <8 x i32> @test_8xi32_zero_masked
 define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10307,7 +10307,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10337,7 +10337,7 @@ define <8 x i32> @test_8xi32_shuff_mask3
 define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10357,7 +10357,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10388,7 +10388,7 @@ define <8 x i32> @test_8xi32_shuff_mem_m
 define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10409,7 +10409,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10428,7 +10428,7 @@ define <8 x i32> @test_8xi32_zero_masked
 define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10449,7 +10449,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10468,7 +10468,7 @@ define <8 x i32> @test_8xi32_zero_masked
 define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10489,7 +10489,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10522,7 +10522,7 @@ define <8 x i32> @test_8xi32_shuff_mem_m
 define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10543,7 +10543,7 @@ define <8 x i32> @test_8xi32_masked_shuf
 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10575,7 +10575,7 @@ define <16 x i32> @test_16xi32_shuff_mas
 define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10595,7 +10595,7 @@ define <16 x i32> @test_16xi32_masked_sh
 define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10612,7 +10612,7 @@ define <16 x i32> @test_16xi32_zero_mask
 define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10632,7 +10632,7 @@ define <16 x i32> @test_16xi32_masked_sh
 define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10649,7 +10649,7 @@ define <16 x i32> @test_16xi32_zero_mask
 define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10669,7 +10669,7 @@ define <16 x i32> @test_16xi32_masked_sh
 define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10699,7 +10699,7 @@ define <16 x i32> @test_16xi32_shuff_mas
 define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10719,7 +10719,7 @@ define <16 x i32> @test_16xi32_masked_sh
 define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10750,7 +10750,7 @@ define <16 x i32> @test_16xi32_shuff_mem
 define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10771,7 +10771,7 @@ define <16 x i32> @test_16xi32_masked_sh
 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10790,7 +10790,7 @@ define <16 x i32> @test_16xi32_zero_mask
 define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10811,7 +10811,7 @@ define <16 x i32> @test_16xi32_masked_sh
 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10830,7 +10830,7 @@ define <16 x i32> @test_16xi32_zero_mask
 define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10851,7 +10851,7 @@ define <16 x i32> @test_16xi32_masked_sh
 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10884,7 +10884,7 @@ define <16 x i32> @test_16xi32_shuff_mem
 define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10905,7 +10905,7 @@ define <16 x i32> @test_16xi32_masked_sh
 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10937,7 +10937,7 @@ define <4 x i64> @test_4xi64_shuff_mask0
 define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10957,7 +10957,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -10974,7 +10974,7 @@ define <4 x i64> @test_4xi64_zero_masked
 define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -10994,7 +10994,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11011,7 +11011,7 @@ define <4 x i64> @test_4xi64_zero_masked
 define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11031,7 +11031,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11061,7 +11061,7 @@ define <4 x i64> @test_4xi64_shuff_mask3
 define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11081,7 +11081,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11112,7 +11112,7 @@ define <4 x i64> @test_4xi64_shuff_mem_m
 define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11133,7 +11133,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11152,7 +11152,7 @@ define <4 x i64> @test_4xi64_zero_masked
 define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11173,7 +11173,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11192,7 +11192,7 @@ define <4 x i64> @test_4xi64_zero_masked
 define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11213,7 +11213,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11246,7 +11246,7 @@ define <4 x i64> @test_4xi64_shuff_mem_m
 define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11267,7 +11267,7 @@ define <4 x i64> @test_4xi64_masked_shuf
 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11299,7 +11299,7 @@ define <8 x i64> @test_8xi64_shuff_mask0
 define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11319,7 +11319,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11336,7 +11336,7 @@ define <8 x i64> @test_8xi64_zero_masked
 define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11356,7 +11356,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11373,7 +11373,7 @@ define <8 x i64> @test_8xi64_zero_masked
 define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11393,7 +11393,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11423,7 +11423,7 @@ define <8 x i64> @test_8xi64_shuff_mask3
 define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11443,7 +11443,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11474,7 +11474,7 @@ define <8 x i64> @test_8xi64_shuff_mem_m
 define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11495,7 +11495,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11514,7 +11514,7 @@ define <8 x i64> @test_8xi64_zero_masked
 define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11535,7 +11535,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11554,7 +11554,7 @@ define <8 x i64> @test_8xi64_zero_masked
 define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11575,7 +11575,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11608,7 +11608,7 @@ define <8 x i64> @test_8xi64_shuff_mem_m
 define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11629,7 +11629,7 @@ define <8 x i64> @test_8xi64_masked_shuf
 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11661,7 +11661,7 @@ define <4 x float> @test_4xfloat_unpack_
 define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11681,7 +11681,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11698,7 +11698,7 @@ define <4 x float> @test_4xfloat_zero_ma
 define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11718,7 +11718,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11735,7 +11735,7 @@ define <4 x float> @test_4xfloat_zero_ma
 define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11755,7 +11755,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11785,7 +11785,7 @@ define <4 x float> @test_4xfloat_unpack_
 define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11805,7 +11805,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11836,7 +11836,7 @@ define <4 x float> @test_4xfloat_unpack_
 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11857,7 +11857,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11876,7 +11876,7 @@ define <4 x float> @test_4xfloat_zero_ma
 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11897,7 +11897,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11916,7 +11916,7 @@ define <4 x float> @test_4xfloat_zero_ma
 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11937,7 +11937,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -11970,7 +11970,7 @@ define <4 x float> @test_4xfloat_unpack_
 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -11991,7 +11991,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12023,7 +12023,7 @@ define <8 x float> @test_8xfloat_unpack_
 define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12043,7 +12043,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12060,7 +12060,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12080,7 +12080,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12097,7 +12097,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12117,7 +12117,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12147,7 +12147,7 @@ define <8 x float> @test_8xfloat_unpack_
 define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12167,7 +12167,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12198,7 +12198,7 @@ define <8 x float> @test_8xfloat_unpack_
 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12219,7 +12219,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12238,7 +12238,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12259,7 +12259,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12278,7 +12278,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12299,7 +12299,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12332,7 +12332,7 @@ define <8 x float> @test_8xfloat_unpack_
 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12353,7 +12353,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12385,7 +12385,7 @@ define <16 x float> @test_16xfloat_unpac
 define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12405,7 +12405,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12422,7 +12422,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12442,7 +12442,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12459,7 +12459,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12479,7 +12479,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12509,7 +12509,7 @@ define <16 x float> @test_16xfloat_unpac
 define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12529,7 +12529,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12560,7 +12560,7 @@ define <16 x float> @test_16xfloat_unpac
 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12581,7 +12581,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12600,7 +12600,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12621,7 +12621,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12640,7 +12640,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12661,7 +12661,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12694,7 +12694,7 @@ define <16 x float> @test_16xfloat_unpac
 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12715,7 +12715,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12747,7 +12747,7 @@ define <2 x double> @test_2xdouble_unpac
 define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12767,7 +12767,7 @@ define <2 x double> @test_2xdouble_maske
 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12784,7 +12784,7 @@ define <2 x double> @test_2xdouble_zero_
 define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12804,7 +12804,7 @@ define <2 x double> @test_2xdouble_maske
 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12835,7 +12835,7 @@ define <2 x double> @test_2xdouble_unpac
 define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12856,7 +12856,7 @@ define <2 x double> @test_2xdouble_maske
 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12875,7 +12875,7 @@ define <2 x double> @test_2xdouble_zero_
 define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12896,7 +12896,7 @@ define <2 x double> @test_2xdouble_maske
 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12928,7 +12928,7 @@ define <4 x double> @test_4xdouble_unpac
 define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12948,7 +12948,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -12965,7 +12965,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -12985,7 +12985,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13002,7 +13002,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13022,7 +13022,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13052,7 +13052,7 @@ define <4 x double> @test_4xdouble_unpac
 define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13072,7 +13072,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13103,7 +13103,7 @@ define <4 x double> @test_4xdouble_unpac
 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13124,7 +13124,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13143,7 +13143,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13164,7 +13164,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13183,7 +13183,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13204,7 +13204,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13237,7 +13237,7 @@ define <4 x double> @test_4xdouble_unpac
 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13258,7 +13258,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13290,7 +13290,7 @@ define <8 x double> @test_8xdouble_unpac
 define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13310,7 +13310,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13327,7 +13327,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13347,7 +13347,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13364,7 +13364,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13384,7 +13384,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13414,7 +13414,7 @@ define <8 x double> @test_8xdouble_unpac
 define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13434,7 +13434,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13465,7 +13465,7 @@ define <8 x double> @test_8xdouble_unpac
 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13486,7 +13486,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13505,7 +13505,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13526,7 +13526,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13545,7 +13545,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13566,7 +13566,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13599,7 +13599,7 @@ define <8 x double> @test_8xdouble_unpac
 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13620,7 +13620,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13652,7 +13652,7 @@ define <4 x float> @test_4xfloat_unpack_
 define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13672,7 +13672,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13689,7 +13689,7 @@ define <4 x float> @test_4xfloat_zero_ma
 define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13709,7 +13709,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13726,7 +13726,7 @@ define <4 x float> @test_4xfloat_zero_ma
 define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13746,7 +13746,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13776,7 +13776,7 @@ define <4 x float> @test_4xfloat_unpack_
 define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13796,7 +13796,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13827,7 +13827,7 @@ define <4 x float> @test_4xfloat_unpack_
 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13848,7 +13848,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13867,7 +13867,7 @@ define <4 x float> @test_4xfloat_zero_ma
 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13888,7 +13888,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13907,7 +13907,7 @@ define <4 x float> @test_4xfloat_zero_ma
 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13928,7 +13928,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -13961,7 +13961,7 @@ define <4 x float> @test_4xfloat_unpack_
 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -13982,7 +13982,7 @@ define <4 x float> @test_4xfloat_masked_
 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
 ; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14014,7 +14014,7 @@ define <8 x float> @test_8xfloat_unpack_
 define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14034,7 +14034,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14051,7 +14051,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14071,7 +14071,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14088,7 +14088,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14108,7 +14108,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14138,7 +14138,7 @@ define <8 x float> @test_8xfloat_unpack_
 define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14158,7 +14158,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14189,7 +14189,7 @@ define <8 x float> @test_8xfloat_unpack_
 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14210,7 +14210,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14229,7 +14229,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14250,7 +14250,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14269,7 +14269,7 @@ define <8 x float> @test_8xfloat_zero_ma
 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14290,7 +14290,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14323,7 +14323,7 @@ define <8 x float> @test_8xfloat_unpack_
 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14344,7 +14344,7 @@ define <8 x float> @test_8xfloat_masked_
 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
 ; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14376,7 +14376,7 @@ define <16 x float> @test_16xfloat_unpac
 define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14396,7 +14396,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14413,7 +14413,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14433,7 +14433,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14450,7 +14450,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14470,7 +14470,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14500,7 +14500,7 @@ define <16 x float> @test_16xfloat_unpac
 define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14520,7 +14520,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14551,7 +14551,7 @@ define <16 x float> @test_16xfloat_unpac
 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14572,7 +14572,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14591,7 +14591,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14612,7 +14612,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14631,7 +14631,7 @@ define <16 x float> @test_16xfloat_zero_
 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14652,7 +14652,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14685,7 +14685,7 @@ define <16 x float> @test_16xfloat_unpac
 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14706,7 +14706,7 @@ define <16 x float> @test_16xfloat_maske
 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
 ; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14738,7 +14738,7 @@ define <2 x double> @test_2xdouble_unpac
 define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14758,7 +14758,7 @@ define <2 x double> @test_2xdouble_maske
 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14775,7 +14775,7 @@ define <2 x double> @test_2xdouble_zero_
 define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm3, %xmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14795,7 +14795,7 @@ define <2 x double> @test_2xdouble_maske
 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14826,7 +14826,7 @@ define <2 x double> @test_2xdouble_unpac
 define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14847,7 +14847,7 @@ define <2 x double> @test_2xdouble_maske
 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14866,7 +14866,7 @@ define <2 x double> @test_2xdouble_zero_
 define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14887,7 +14887,7 @@ define <2 x double> @test_2xdouble_maske
 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) {
 ; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14919,7 +14919,7 @@ define <4 x double> @test_4xdouble_unpac
 define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14939,7 +14939,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14956,7 +14956,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -14976,7 +14976,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -14993,7 +14993,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15013,7 +15013,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15043,7 +15043,7 @@ define <4 x double> @test_4xdouble_unpac
 define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15063,7 +15063,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15094,7 +15094,7 @@ define <4 x double> @test_4xdouble_unpac
 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15115,7 +15115,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15134,7 +15134,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15155,7 +15155,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15174,7 +15174,7 @@ define <4 x double> @test_4xdouble_zero_
 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15195,7 +15195,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15228,7 +15228,7 @@ define <4 x double> @test_4xdouble_unpac
 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %ymm1, %ymm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15249,7 +15249,7 @@ define <4 x double> @test_4xdouble_maske
 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
 ; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15281,7 +15281,7 @@ define <8 x double> @test_8xdouble_unpac
 define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15301,7 +15301,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15318,7 +15318,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15338,7 +15338,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15355,7 +15355,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15375,7 +15375,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15405,7 +15405,7 @@ define <8 x double> @test_8xdouble_unpac
 define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm2, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15425,7 +15425,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15456,7 +15456,7 @@ define <8 x double> @test_8xdouble_unpac
 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15477,7 +15477,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15496,7 +15496,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15517,7 +15517,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15536,7 +15536,7 @@ define <8 x double> @test_8xdouble_zero_
 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15557,7 +15557,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -15590,7 +15590,7 @@ define <8 x double> @test_8xdouble_unpac
 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -15611,7 +15611,7 @@ define <8 x double> @test_8xdouble_maske
 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
 ; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [6:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-schedule.ll?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-schedule.ll Fri Apr 20 14:16:05 2018
@@ -16,7 +16,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
+; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -40,7 +40,7 @@ define i64 @test_cvtpd2pi(<2 x double> %
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
+; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -261,7 +261,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
-; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:1.00]
+; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -285,7 +285,7 @@ define i64 @test_cvtps2pi(<4 x float> %a
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
-; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:1.00]
+; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
 ; SANDY-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -350,7 +350,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; GENERIC-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
+; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -374,7 +374,7 @@ define i64 @test_cvttpd2pi(<2 x double>
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
 ; SANDY-NEXT:    cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
+; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -439,7 +439,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
-; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:1.00]
+; GENERIC-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -463,7 +463,7 @@ define i64 @test_cvttps2pi(<4 x float> %
 ; SANDY:       # %bb.0:
 ; SANDY-NEXT:    cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
 ; SANDY-NEXT:    cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
-; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:1.00]
+; SANDY-NEXT:    por %mm0, %mm1 # sched: [1:0.33]
 ; SANDY-NEXT:    movq %mm1, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2198,8 +2198,8 @@ declare x86_mmx @llvm.x86.mmx.palignr.b(
 define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
 ; GENERIC-LABEL: test_pand:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pand %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    pand (%rdi), %mm0 # sched: [6:1.00]
+; GENERIC-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
+; GENERIC-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2219,8 +2219,8 @@ define i64 @test_pand(x86_mmx %a0, x86_m
 ;
 ; SANDY-LABEL: test_pand:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    pand %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    pand (%rdi), %mm0 # sched: [6:1.00]
+; SANDY-NEXT:    pand %mm1, %mm0 # sched: [1:0.33]
+; SANDY-NEXT:    pand (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2276,8 +2276,8 @@ declare x86_mmx @llvm.x86.mmx.pand(x86_m
 define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
 ; GENERIC-LABEL: test_pandn:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pandn %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    pandn (%rdi), %mm0 # sched: [6:1.00]
+; GENERIC-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
+; GENERIC-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2297,8 +2297,8 @@ define i64 @test_pandn(x86_mmx %a0, x86_
 ;
 ; SANDY-LABEL: test_pandn:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    pandn %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    pandn (%rdi), %mm0 # sched: [6:1.00]
+; SANDY-NEXT:    pandn %mm1, %mm0 # sched: [1:0.33]
+; SANDY-NEXT:    pandn (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4503,8 +4503,8 @@ declare x86_mmx @llvm.x86.mmx.pmulu.dq(x
 define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
 ; GENERIC-LABEL: test_por:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    por (%rdi), %mm0 # sched: [6:1.00]
+; GENERIC-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
+; GENERIC-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4524,8 +4524,8 @@ define i64 @test_por(x86_mmx %a0, x86_mm
 ;
 ; SANDY-LABEL: test_por:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    por (%rdi), %mm0 # sched: [6:1.00]
+; SANDY-NEXT:    por %mm1, %mm0 # sched: [1:0.33]
+; SANDY-NEXT:    por (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6861,8 +6861,8 @@ declare x86_mmx @llvm.x86.mmx.punpcklwd(
 define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
 ; GENERIC-LABEL: test_pxor:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    pxor %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT:    pxor (%rdi), %mm0 # sched: [6:1.00]
+; GENERIC-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
+; GENERIC-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
 ; GENERIC-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6882,8 +6882,8 @@ define i64 @test_pxor(x86_mmx %a0, x86_m
 ;
 ; SANDY-LABEL: test_pxor:
 ; SANDY:       # %bb.0:
-; SANDY-NEXT:    pxor %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT:    pxor (%rdi), %mm0 # sched: [6:1.00]
+; SANDY-NEXT:    pxor %mm1, %mm0 # sched: [1:0.33]
+; SANDY-NEXT:    pxor (%rdi), %mm0 # sched: [6:0.50]
 ; SANDY-NEXT:    movq %mm0, %rax # sched: [1:0.33]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;

Modified: llvm/trunk/test/CodeGen/X86/x87-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x87-schedule.ll?rev=330480&r1=330479&r2=330480&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x87-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x87-schedule.ll Fri Apr 20 14:16:05 2018
@@ -124,28 +124,28 @@ define void @test_fabs() optsize {
 ; BROADWELL-LABEL: test_fabs:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fabs # sched: [1:0.33]
+; BROADWELL-NEXT:    fabs # sched: [1:1.00]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKYLAKE-LABEL: test_fabs:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fabs # sched: [1:0.33]
+; SKYLAKE-NEXT:    fabs # sched: [1:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKX-LABEL: test_fabs:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fabs # sched: [1:0.33]
+; SKX-NEXT:    fabs # sched: [1:1.00]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retl # sched: [6:0.50]
 ;
 ; BTVER2-LABEL: test_fabs:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fabs # sched: [1:0.50]
+; BTVER2-NEXT:    fabs # sched: [2:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retl # sched: [4:1.00]
 ;
@@ -540,28 +540,28 @@ define void @test_fchs() optsize {
 ; BROADWELL-LABEL: test_fchs:
 ; BROADWELL:       # %bb.0:
 ; BROADWELL-NEXT:    #APP
-; BROADWELL-NEXT:    fchs # sched: [1:0.33]
+; BROADWELL-NEXT:    fchs # sched: [1:1.00]
 ; BROADWELL-NEXT:    #NO_APP
 ; BROADWELL-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKYLAKE-LABEL: test_fchs:
 ; SKYLAKE:       # %bb.0:
 ; SKYLAKE-NEXT:    #APP
-; SKYLAKE-NEXT:    fchs # sched: [1:0.33]
+; SKYLAKE-NEXT:    fchs # sched: [1:1.00]
 ; SKYLAKE-NEXT:    #NO_APP
 ; SKYLAKE-NEXT:    retl # sched: [6:0.50]
 ;
 ; SKX-LABEL: test_fchs:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    #APP
-; SKX-NEXT:    fchs # sched: [1:0.33]
+; SKX-NEXT:    fchs # sched: [1:1.00]
 ; SKX-NEXT:    #NO_APP
 ; SKX-NEXT:    retl # sched: [6:0.50]
 ;
 ; BTVER2-LABEL: test_fchs:
 ; BTVER2:       # %bb.0:
 ; BTVER2-NEXT:    #APP
-; BTVER2-NEXT:    fchs # sched: [1:0.50]
+; BTVER2-NEXT:    fchs # sched: [2:1.00]
 ; BTVER2-NEXT:    #NO_APP
 ; BTVER2-NEXT:    retl # sched: [4:1.00]
 ;




More information about the llvm-commits mailing list