[llvm] r319945 - [X86][AVX512] Tag mask reg op instruction scheduler classes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 6 11:36:01 PST 2017


Author: rksimon
Date: Wed Dec  6 11:36:00 2017
New Revision: 319945

URL: http://llvm.org/viewvc/llvm-project?rev=319945&view=rev
Log:
[X86][AVX512] Tag mask reg op instruction scheduler classes

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319945&r1=319944&r2=319945&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Dec  6 11:36:00 2017
@@ -2621,15 +2621,16 @@ defm VFPCLASS : avx512_fp_fpclass_all<"v
 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
                          string OpcodeStr, RegisterClass KRC,
                          ValueType vvt, X86MemOperand x86memop> {
-  let hasSideEffects = 0 in
+  let hasSideEffects = 0, SchedRW = [WriteMove] in
   def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
-             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+             !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+             IIC_SSE_MOVDQ>;
   def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-             [(set KRC:$dst, (vvt (load addr:$src)))]>;
+             [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>;
   def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-             [(store KRC:$src, addr:$dst)]>;
+             [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>;
 }
 
 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
@@ -2637,9 +2638,11 @@ multiclass avx512_mask_mov_gpr<bits<8> o
                              RegisterClass KRC, RegisterClass GRC> {
   let hasSideEffects = 0 in {
     def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
-               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+               IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
     def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
-               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+               IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
   }
 }
 
@@ -2805,26 +2808,27 @@ let Predicates = [HasAVX512] in {
 // - KNOT
 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
                             RegisterClass KRC, SDPatternOperator OpNode,
-                            Predicate prd> {
+                            OpndItins itins, Predicate prd> {
   let Predicates = [prd] in
     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
                !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-               [(set KRC:$dst, (OpNode KRC:$src))]>;
+               [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
+               Sched<[itins.Sched]>;
 }
 
 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
-                                SDPatternOperator OpNode> {
+                                SDPatternOperator OpNode, OpndItins itins> {
   defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
-                            HasDQI>, VEX, PD;
+                            itins, HasDQI>, VEX, PD;
   defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
-                            HasAVX512>, VEX, PS;
+                            itins, HasAVX512>, VEX, PS;
   defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
-                            HasBWI>, VEX, PD, VEX_W;
+                            itins, HasBWI>, VEX, PD, VEX_W;
   defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
-                            HasBWI>, VEX, PS, VEX_W;
+                            itins, HasBWI>, VEX, PS, VEX_W;
 }
 
-defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;
+defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
 
 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
 let Predicates = [HasAVX512, NoDQI] in
@@ -2840,25 +2844,26 @@ def : Pat<(vnot VK2:$src),
 // - KAND, KANDN, KOR, KXNOR, KXOR
 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
                            RegisterClass KRC, SDPatternOperator OpNode,
-                           Predicate prd, bit IsCommutable> {
+                           OpndItins itins, Predicate prd, bit IsCommutable> {
   let Predicates = [prd], isCommutable = IsCommutable in
     def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
                !strconcat(OpcodeStr,
                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
+               [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
+               Sched<[itins.Sched]>;
 }
 
 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
-                               SDPatternOperator OpNode, bit IsCommutable,
-                               Predicate prdW = HasAVX512> {
+                               SDPatternOperator OpNode, OpndItins itins,
+                               bit IsCommutable, Predicate prdW = HasAVX512> {
   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
-                             HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
+                             itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
-                             prdW, IsCommutable>, VEX_4V, VEX_L, PS;
+                             itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
-                             HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
+                             itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
-                             HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
+                             itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
 }
 
 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
@@ -2867,12 +2872,12 @@ def xnor : PatFrag<(ops node:$i0, node:$
 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
 
-defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,   1>;
-defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,    1>;
-defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>;
-defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,   1>;
-defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
-defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  add,   1, HasDQI>;
+defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,   SSE_BIT_ITINS_P, 1>;
+defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,    SSE_BIT_ITINS_P, 1>;
+defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
+defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,   SSE_BIT_ITINS_P, 1>;
+defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
+defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  add,   SSE_BIT_ITINS_P, 1, HasDQI>;
 
 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
                             Instruction Inst> {
@@ -2907,13 +2912,13 @@ defm : avx512_binop_pat<xor,   xor,  KXO
 
 // Mask unpacking
 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
-                             RegisterClass KRCSrc, Predicate prd> {
+                             RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
   let Predicates = [prd] in {
     let hasSideEffects = 0 in
     def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
                (ins KRC:$src1, KRC:$src2),
-               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-               VEX_4V, VEX_L;
+               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+               itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
 
     def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
               (!cast<Instruction>(NAME##rr)
@@ -2922,61 +2927,63 @@ multiclass avx512_mask_unpck<string Suff
   }
 }
 
-defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
-defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
-defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
+defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
+defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
 
 // Mask bit testing
 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
-                              SDNode OpNode, Predicate prd> {
+                              SDNode OpNode, OpndItins itins, Predicate prd> {
   let Predicates = [prd], Defs = [EFLAGS] in
     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
-               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
+               [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
+               Sched<[itins.Sched]>;
 }
 
 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                Predicate prdW = HasAVX512> {
-  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
+                                OpndItins itins, Predicate prdW = HasAVX512> {
+  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
                                                                 VEX, PD;
-  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
+  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
                                                                 VEX, PS;
-  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
+  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
                                                                 VEX, PS, VEX_W;
-  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
+  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
                                                                 VEX, PD, VEX_W;
 }
 
-defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
-defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
+defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
+defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
 
 // Mask shift
 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
-                             SDNode OpNode> {
+                             SDNode OpNode, OpndItins itins> {
   let Predicates = [HasAVX512] in
     def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
                  !strconcat(OpcodeStr,
                             "\t{$imm, $src, $dst|$dst, $src, $imm}"),
-                            [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
+                            [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
+                 itins.rr>, Sched<[itins.Sched]>;
 }
 
 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
-                               SDNode OpNode> {
-  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
-                               VEX, TAPD, VEX_W;
+                               SDNode OpNode, OpndItins itins> {
+  defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
+                               itins>, VEX, TAPD, VEX_W;
   let Predicates = [HasDQI] in
-  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
-                               VEX, TAPD;
+  defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
+                               itins>, VEX, TAPD;
   let Predicates = [HasBWI] in {
-  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
-                               VEX, TAPD, VEX_W;
-  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
-                               VEX, TAPD;
+  defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
+                               itins>, VEX, TAPD, VEX_W;
+  defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
+                               itins>, VEX, TAPD;
   }
 }
 
-defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
-defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
+defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
+defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
 
 multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
 def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
@@ -3023,7 +3030,8 @@ let Predicates = [HasAVX512, NoVLX] in {
 // Mask setting all 0s or 1s
 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
   let Predicates = [HasAVX512] in
-    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
+    let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
+        SchedRW = [WriteZero] in
       def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
                      [(set KRC:$dst, (VT Val))]>;
 }

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=319945&r1=319944&r2=319945&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Dec  6 11:36:00 2017
@@ -6172,6 +6172,11 @@ let Predicates = [UseSSE41] in {
 // SSE4.1 - Packed Bit Test
 //===----------------------------------------------------------------------===//
 
+let Sched = WriteVecLogic in 
+def SSE_PTEST : OpndItins<
+  IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
+>;
+
 // ptest instruction we'll lower to this in X86ISelLowering primarily from
 // the intel intrinsic that corresponds to this.
 let Defs = [EFLAGS], Predicates = [HasAVX] in {

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319945&r1=319944&r2=319945&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed Dec  6 11:36:00 2017
@@ -1164,7 +1164,7 @@ define i32 @test3(float %a, float %b) {
 ; GENERIC-LABEL: test3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    movzbl %al, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1719,7 +1719,7 @@ define <8 x double> @sito8f64(<8 x i32>
 define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
 ; GENERIC-LABEL: i32to8f64_mask:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -1741,7 +1741,7 @@ define <8 x double> @i32to8f64_mask(<8 x
 define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
 ; GENERIC-LABEL: sito8f64_maskz:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2193,7 +2193,7 @@ define <16 x float> @ulto16f32(<16 x i64
 define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
 ; GENERIC-LABEL: uito8f64_mask:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2215,7 +2215,7 @@ define <8 x double> @uito8f64_mask(<8 x
 define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
 ; GENERIC-LABEL: uito8f64_maskz:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2855,7 +2855,7 @@ define <16 x double> @ubto16f64(<16 x i3
 ; GENERIC-NEXT:    movl {{.*}}(%rip), %eax # sched: [5:0.50]
 ; GENERIC-NEXT:    vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT:    kshiftrw $8, %k1, %k1
+; GENERIC-NEXT:    kshiftrw $8, %k1, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4298,7 +4298,7 @@ define <8 x double> @fpext_test(<8 x flo
 define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
 ; GENERIC-LABEL: zext_16i1_to_16xi32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4315,7 +4315,7 @@ define   <16 x i32> @zext_16i1_to_16xi32
 define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
 ; GENERIC-LABEL: zext_8i1_to_8xi64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4334,7 +4334,7 @@ define i16 @trunc_16i8_to_16i1(<16 x i8>
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4355,7 +4355,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %zmm0, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vptestmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4404,7 +4404,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %al<def> %al<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4443,13 +4443,13 @@ define i16 @trunc_i32_to_i1(i32 %a) {
 ; GENERIC-LABEL: trunc_i32_to_i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    movw $-4, %ax # sched: [1:0.33]
-; GENERIC-NEXT:    kmovd %eax, %k0
-; GENERIC-NEXT:    kshiftrw $1, %k0, %k0
-; GENERIC-NEXT:    kshiftlw $1, %k0, %k0
+; GENERIC-NEXT:    kmovd %eax, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kshiftrw $1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftlw $1, %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    andl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT:    kmovw %edi, %k1
-; GENERIC-NEXT:    korw %k1, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovw %edi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    korw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4550,7 +4550,7 @@ define <64 x i16> @test21(<64 x i16> %x
 ; GENERIC-NEXT:    vpsllw $7, %zmm2, %zmm2 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovb2m %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
-; GENERIC-NEXT:    kshiftrq $32, %k1, %k1
+; GENERIC-NEXT:    kshiftrq $32, %k1, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5357,7 +5357,7 @@ define <32 x i16> @xor_v32i16(<32 x i16>
 define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
 ; GENERIC-LABEL: masked_and_v16f32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5382,7 +5382,7 @@ define <16 x float> @masked_and_v16f32(<
 define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
 ; GENERIC-LABEL: masked_or_v16f32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5407,7 +5407,7 @@ define <16 x float> @masked_or_v16f32(<1
 define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
 ; GENERIC-LABEL: masked_xor_v16f32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5432,7 +5432,7 @@ define <16 x float> @masked_xor_v16f32(<
 define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
 ; GENERIC-LABEL: masked_and_v8f64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5457,7 +5457,7 @@ define <8 x double> @masked_and_v8f64(<8
 define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
 ; GENERIC-LABEL: masked_or_v8f64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5482,7 +5482,7 @@ define <8 x double> @masked_or_v8f64(<8
 define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
 ; GENERIC-LABEL: masked_xor_v8f64:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5507,7 +5507,7 @@ define <8 x double> @masked_xor_v8f64(<8
 define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
 ; GENERIC-LABEL: test_mm512_mask_and_epi32:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5529,7 +5529,7 @@ entry:
 define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
 ; GENERIC-LABEL: test_mm512_mask_or_epi32:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5551,7 +5551,7 @@ entry:
 define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
 ; GENERIC-LABEL: test_mm512_mask_xor_epi32:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5573,7 +5573,7 @@ entry:
 define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; GENERIC-LABEL: test_mm512_mask_xor_pd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5595,7 +5595,7 @@ entry:
 define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; GENERIC-LABEL: test_mm512_maskz_xor_pd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5617,7 +5617,7 @@ entry:
 define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; GENERIC-LABEL: test_mm512_mask_xor_ps:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5639,7 +5639,7 @@ entry:
 define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; GENERIC-LABEL: test_mm512_maskz_xor_ps:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5661,7 +5661,7 @@ entry:
 define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; GENERIC-LABEL: test_mm512_mask_or_pd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5683,7 +5683,7 @@ entry:
 define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; GENERIC-LABEL: test_mm512_maskz_or_pd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5705,7 +5705,7 @@ entry:
 define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; GENERIC-LABEL: test_mm512_mask_or_ps:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5727,7 +5727,7 @@ entry:
 define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; GENERIC-LABEL: test_mm512_maskz_or_ps:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5749,7 +5749,7 @@ entry:
 define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; GENERIC-LABEL: test_mm512_mask_and_pd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5771,7 +5771,7 @@ entry:
 define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; GENERIC-LABEL: test_mm512_maskz_and_pd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5793,7 +5793,7 @@ entry:
 define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; GENERIC-LABEL: test_mm512_mask_and_ps:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5815,7 +5815,7 @@ entry:
 define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; GENERIC-LABEL: test_mm512_maskz_and_ps:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5837,7 +5837,7 @@ entry:
 define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; GENERIC-LABEL: test_mm512_mask_andnot_pd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5860,7 +5860,7 @@ entry:
 define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
 ; GENERIC-LABEL: test_mm512_maskz_andnot_pd:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5883,7 +5883,7 @@ entry:
 define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; GENERIC-LABEL: test_mm512_mask_andnot_ps:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5906,7 +5906,7 @@ entry:
 define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; GENERIC-LABEL: test_mm512_maskz_andnot_ps:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kmovd %edi, %k1
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6737,9 +6737,9 @@ define <8 x double> @mov_test47(i8 * %ad
 define i16 @mask16(i16 %x) {
 ; GENERIC-LABEL: mask16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    knotw %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6759,9 +6759,9 @@ define i16 @mask16(i16 %x) {
 define i32 @mask16_zext(i16 %x) {
 ; GENERIC-LABEL: mask16_zext:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    knotw %k0, %k0
-; GENERIC-NEXT:    kmovw %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovw %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mask16_zext:
@@ -6780,9 +6780,9 @@ define i32 @mask16_zext(i16 %x) {
 define i8 @mask8(i8 %x) {
 ; GENERIC-LABEL: mask8:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    knotb %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %al<def> %al<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6802,9 +6802,9 @@ define i8 @mask8(i8 %x) {
 define i32 @mask8_zext(i8 %x) {
 ; GENERIC-LABEL: mask8_zext:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    knotb %k0, %k0
-; GENERIC-NEXT:    kmovb %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovb %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: mask8_zext:
@@ -6824,7 +6824,7 @@ define void @mask16_mem(i16* %ptr) {
 ; GENERIC-LABEL: mask16_mem:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovw (%rdi), %k0
-; GENERIC-NEXT:    knotw %k0, %k0
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovw %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6846,7 +6846,7 @@ define void @mask8_mem(i8* %ptr) {
 ; GENERIC-LABEL: mask8_mem:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovb (%rdi), %k0
-; GENERIC-NEXT:    knotb %k0, %k0
+; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6896,10 +6896,10 @@ define i16 @mand16_mem(<16 x i1>* %x, <1
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovw (%rdi), %k0
 ; GENERIC-NEXT:    kmovw (%rsi), %k1
-; GENERIC-NEXT:    kandw %k1, %k0, %k2
-; GENERIC-NEXT:    kxorw %k1, %k0, %k0
-; GENERIC-NEXT:    korw %k0, %k2, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kandw %k1, %k0, %k2 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    korw %k0, %k2, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6925,9 +6925,9 @@ define i16 @mand16_mem(<16 x i1>* %x, <1
 define i8 @shuf_test1(i16 %v) nounwind {
 ; GENERIC-LABEL: shuf_test1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    kshiftrw $8, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kshiftrw $8, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %al<def> %al<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6948,9 +6948,9 @@ define i32 @zext_test1(<16 x i32> %a, <1
 ; GENERIC-LABEL: zext_test1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    kshiftlw $10, %k0, %k0
-; GENERIC-NEXT:    kshiftrw $15, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kshiftlw $10, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    andl $1, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -6974,9 +6974,9 @@ define i16 @zext_test2(<16 x i32> %a, <1
 ; GENERIC-LABEL: zext_test2:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    kshiftlw $10, %k0, %k0
-; GENERIC-NEXT:    kshiftrw $15, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kshiftlw $10, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    andl $1, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    vzeroupper
@@ -7002,9 +7002,9 @@ define i8 @zext_test3(<16 x i32> %a, <16
 ; GENERIC-LABEL: zext_test3:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    kshiftlw $10, %k0, %k0
-; GENERIC-NEXT:    kshiftrw $15, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kshiftlw $10, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    andb $1, %al # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %al<def> %al<kill> %eax<kill>
 ; GENERIC-NEXT:    vzeroupper
@@ -7029,7 +7029,7 @@ define i8 @zext_test3(<16 x i32> %a, <16
 define i8 @conv1(<8 x i1>* %R) {
 ; GENERIC-LABEL: conv1:
 ; GENERIC:       # %bb.0: # %entry
-; GENERIC-NEXT:    kxnorw %k0, %k0, %k0
+; GENERIC-NEXT:    kxnorw %k0, %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi)
 ; GENERIC-NEXT:    movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
 ; GENERIC-NEXT:    movb $-2, %al # sched: [1:0.33]
@@ -7057,7 +7057,7 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT:    kandnw %k0, %k1, %k0
+; GENERIC-NEXT:    kandnw %k0, %k1, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7082,7 +7082,7 @@ define <2 x i64> @vcmp_test5(<2 x i64> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpcmpgtq %xmm3, %xmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT:    kandnw %k1, %k0, %k0
+; GENERIC-NEXT:    kandnw %k1, %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7117,9 +7117,9 @@ define void @vcmp_test7(<8 x i1> %mask)
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    movb $85, %al # sched: [1:0.33]
-; GENERIC-NEXT:    kmovd %eax, %k1
-; GENERIC-NEXT:    korb %k1, %k0, %k0
-; GENERIC-NEXT:    ktestb %k0, %k0
+; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    korb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    ktestb %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vcmp_test7:
@@ -7297,7 +7297,7 @@ define <16 x i1> @vmov_test15(i32 %x, i3
 ; GENERIC-NEXT:    # sched: [1:0.33]
 ; GENERIC-NEXT:    movw $1, %cx # sched: [1:0.33]
 ; GENERIC-NEXT:    cmovgw %ax, %cx # sched: [2:0.67]
-; GENERIC-NEXT:    kmovd %ecx, %k0
+; GENERIC-NEXT:    kmovd %ecx, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7322,14 +7322,14 @@ define <64 x i8> @vmov_test16(i64 %x) {
 ;
 ; GENERIC-LABEL: vmov_test16:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovq %rdi, %k0
+; GENERIC-NEXT:    kmovq %rdi, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    movb $1, %al # sched: [1:0.33]
-; GENERIC-NEXT:    kmovd %eax, %k1
+; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2b %k1, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovm2b %k0, %zmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    movl $32, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    kmovd %eax, %k1
+; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:0.33]
@@ -7361,15 +7361,15 @@ define <64 x i8> @vmov_test17(i64 %x, i3
 ;
 ; GENERIC-LABEL: vmov_test17:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovq %rdi, %k0
+; GENERIC-NEXT:    kmovq %rdi, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    cmpl %edx, %esi # sched: [1:0.33]
 ; GENERIC-NEXT:    setg %al # sched: [1:0.50]
-; GENERIC-NEXT:    kmovd %eax, %k1
+; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2b %k1, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovm2b %k0, %zmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    movl $32, %eax # sched: [1:0.33]
-; GENERIC-NEXT:    kmovd %eax, %k1
+; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:0.33]
@@ -7402,21 +7402,21 @@ define <64 x i8> @vmov_test17(i64 %x, i3
 define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
 ; GENERIC-LABEL: vmov_test18:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k1
-; GENERIC-NEXT:    kmovd %esi, %k2
-; GENERIC-NEXT:    kshiftlw $7, %k2, %k0
-; GENERIC-NEXT:    kshiftrw $15, %k0, %k0
-; GENERIC-NEXT:    kshiftlw $6, %k2, %k2
-; GENERIC-NEXT:    kshiftrw $15, %k2, %k2
+; GENERIC-NEXT:    kmovd %edi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kmovd %esi, %k2 # sched: [1:0.33]
+; GENERIC-NEXT:    kshiftlw $7, %k2, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftlw $6, %k2, %k2 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftrw $15, %k2, %k2 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovm2q %k1, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovm2q %k2, %zmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovq2m %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT:    kshiftlb $1, %k1, %k1
-; GENERIC-NEXT:    kshiftrb $1, %k1, %k1
-; GENERIC-NEXT:    kshiftlb $7, %k0, %k0
-; GENERIC-NEXT:    korb %k0, %k1, %k0
+; GENERIC-NEXT:    kshiftlb $1, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftrb $1, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    kshiftlb $7, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    korb %k0, %k1, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7506,9 +7506,9 @@ define void @vmov_test23(<2 x i1> %a, <2
 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
 ; GENERIC-LABEL: store_v1i1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    kxnorw %k0, %k0, %k1
-; GENERIC-NEXT:    kxorw %k1, %k0, %k0
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kxnorw %k0, %k0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovb %k0, (%rsi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7529,7 +7529,7 @@ define void @store_v2i1(<2 x i1> %c , <2
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    knotw %k0, %k0
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7550,7 +7550,7 @@ define void @store_v4i1(<4 x i1> %c , <4
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    knotw %k0, %k0
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7571,7 +7571,7 @@ define void @store_v8i1(<8 x i1> %c , <8
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT:    knotb %k0, %k0
+; GENERIC-NEXT:    knotb %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7592,7 +7592,7 @@ define void @store_v16i1(<16 x i1> %c ,
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT:    knotw %k0, %k0
+; GENERIC-NEXT:    knotw %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    kmovw %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7685,7 +7685,7 @@ define <32 x i16> @test_build_vec_v32i1(
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    movl $1497715861, %eax # imm = 0x59455495
 ; GENERIC-NEXT:    # sched: [1:0.33]
-; GENERIC-NEXT:    kmovd %eax, %k1
+; GENERIC-NEXT:    kmovd %eax, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7721,7 +7721,7 @@ define void @ktest_1(<8 x double> %in, d
 ; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT:    ktestb %k0, %k0
+; GENERIC-NEXT:    ktestb %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    je .LBB410_2 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.1: # %L1
 ; GENERIC-NEXT:    vmovapd %zmm0, (%rdi) # sched: [1:1.00]
@@ -7783,14 +7783,14 @@ define void @ktest_2(<32 x float> %in, f
 ; GENERIC-NEXT:    vmovups 64(%rdi), %zmm3 # sched: [4:0.50]
 ; GENERIC-NEXT:    vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
-; GENERIC-NEXT:    kunpckwd %k1, %k2, %k0
+; GENERIC-NEXT:    kunpckwd %k1, %k2, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
-; GENERIC-NEXT:    kunpckwd %k1, %k2, %k1
-; GENERIC-NEXT:    kord %k1, %k0, %k0
-; GENERIC-NEXT:    ktestd %k0, %k0
+; GENERIC-NEXT:    kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT:    kord %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    ktestd %k0, %k0 # sched: [1:1.00]
 ; GENERIC-NEXT:    je .LBB411_2 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.1: # %L1
 ; GENERIC-NEXT:    vmovaps %zmm0, (%rdi) # sched: [1:1.00]
@@ -8080,7 +8080,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    kmovb %k0, %eax
+; GENERIC-NEXT:    kmovb %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8106,7 +8106,7 @@ define i32 @test_bitcast_v16i1_zext(<16
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    kmovw %k0, %eax
+; GENERIC-NEXT:    kmovw %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    addl %eax, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8129,10 +8129,10 @@ define i32 @test_bitcast_v16i1_zext(<16
 define i16 @test_v16i1_add(i16 %x, i16 %y) {
 ; GENERIC-LABEL: test_v16i1_add:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    kmovd %esi, %k1
-; GENERIC-NEXT:    kxorw %k1, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8154,10 +8154,10 @@ define i16 @test_v16i1_add(i16 %x, i16 %
 define i16 @test_v16i1_sub(i16 %x, i16 %y) {
 ; GENERIC-LABEL: test_v16i1_sub:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    kmovd %esi, %k1
-; GENERIC-NEXT:    kxorw %k1, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8179,10 +8179,10 @@ define i16 @test_v16i1_sub(i16 %x, i16 %
 define i16 @test_v16i1_mul(i16 %x, i16 %y) {
 ; GENERIC-LABEL: test_v16i1_mul:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    kmovd %esi, %k1
-; GENERIC-NEXT:    kandw %k1, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kandw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8204,10 +8204,10 @@ define i16 @test_v16i1_mul(i16 %x, i16 %
 define i8 @test_v8i1_add(i8 %x, i8 %y) {
 ; GENERIC-LABEL: test_v8i1_add:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    kmovd %esi, %k1
-; GENERIC-NEXT:    kxorb %k1, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %al<def> %al<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8229,10 +8229,10 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) {
 define i8 @test_v8i1_sub(i8 %x, i8 %y) {
 ; GENERIC-LABEL: test_v8i1_sub:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    kmovd %esi, %k1
-; GENERIC-NEXT:    kxorb %k1, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kxorb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %al<def> %al<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8254,10 +8254,10 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) {
 define i8 @test_v8i1_mul(i8 %x, i8 %y) {
 ; GENERIC-LABEL: test_v8i1_mul:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    kmovd %edi, %k0
-; GENERIC-NEXT:    kmovd %esi, %k1
-; GENERIC-NEXT:    kandb %k1, %k0, %k0
-; GENERIC-NEXT:    kmovd %k0, %eax
+; GENERIC-NEXT:    kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT:    kandb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT:    kmovd %k0, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    # kill: %al<def> %al<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8592,7 +8592,7 @@ define <16 x i32> @test_vbroadcast() {
 ; GENERIC-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    knotw %k0, %k1
+; GENERIC-NEXT:    knotw %k0, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;




More information about the llvm-commits mailing list