[llvm] r319760 - [X86][AVX512] Tag VPCMP/VPCMPU instruction scheduler classes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 5 04:14:37 PST 2017


Author: rksimon
Date: Tue Dec  5 04:14:36 2017
New Revision: 319760

URL: http://llvm.org/viewvc/llvm-project?rev=319760&view=rev
Log:
[X86][AVX512] Tag VPCMP/VPCMPU instruction scheduler classes

Move hardcoded itinerary out to the instruction declarations. Not sure that IIC_SSE_ALU_F32P is the best schedule for integer comparisons, but I'm not going to change it right now.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319760&r1=319759&r2=319760&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Dec  5 04:14:36 2017
@@ -2092,7 +2092,7 @@ def CommutePCMPCC : SDNodeXForm<imm, [{
 }]>;
 
 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
-                          X86VectorVTInfo _> {
+                          OpndItins itins, X86VectorVTInfo _> {
   let isCommutable = 1 in
   def rri : AVX512AIi8<opc, MRMSrcReg,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
@@ -2100,7 +2100,7 @@ multiclass avx512_icmp_cc<bits<8> opc, s
                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
                                        imm:$cc))],
-             IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+             itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
   def rmi : AVX512AIi8<opc, MRMSrcMem,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
              !strconcat("vpcmp${cc}", Suffix,
@@ -2108,7 +2108,7 @@ multiclass avx512_icmp_cc<bits<8> opc, s
              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
                               (_.VT (bitconvert (_.LdFrag addr:$src2))),
                               imm:$cc))],
-             IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+             itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   let isCommutable = 1 in
   def rrik : AVX512AIi8<opc, MRMSrcReg,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
@@ -2119,7 +2119,7 @@ multiclass avx512_icmp_cc<bits<8> opc, s
               [(set _.KRC:$dst, (and _.KRCWM:$mask,
                                   (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
                                           imm:$cc)))],
-              IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+              itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
   def rmik : AVX512AIi8<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
                                     AVX512ICC:$cc),
@@ -2130,7 +2130,8 @@ multiclass avx512_icmp_cc<bits<8> opc, s
                                    (OpNode (_.VT _.RC:$src1),
                                       (_.VT (bitconvert (_.LdFrag addr:$src2))),
                                       imm:$cc)))],
-              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
+              itins.rm>, EVEX_4V, EVEX_K,
+              Sched<[itins.Sched.Folded, ReadAfterLd]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
@@ -2138,20 +2139,20 @@ multiclass avx512_icmp_cc<bits<8> opc, s
                (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
                           "$dst, $src1, $src2, $cc}"),
-               [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+               [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
     let mayLoad = 1 in
     def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
                (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
                !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
                           "$dst, $src1, $src2, $cc}"),
-               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+               [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
     def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
                                        u8imm:$cc),
                !strconcat("vpcmp", Suffix,
                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
                           "$dst {${mask}}, $src1, $src2, $cc}"),
-               [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
+               [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
     let mayLoad = 1 in
     def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
@@ -2159,7 +2160,8 @@ multiclass avx512_icmp_cc<bits<8> opc, s
                !strconcat("vpcmp", Suffix,
                           "\t{$cc, $src2, $src1, $dst {${mask}}|",
                           "$dst {${mask}}, $src1, $src2, $cc}"),
-               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
+               [], itins.rm>, EVEX_4V, EVEX_K,
+               Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 
   def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
@@ -2175,8 +2177,8 @@ multiclass avx512_icmp_cc<bits<8> opc, s
 }
 
 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
-                              X86VectorVTInfo _> :
-           avx512_icmp_cc<opc, Suffix, OpNode, _> {
+                              OpndItins itins, X86VectorVTInfo _> :
+           avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
   def rmib : AVX512AIi8<opc, MRMSrcMem,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
                                      AVX512ICC:$cc),
@@ -2186,7 +2188,8 @@ multiclass avx512_icmp_cc_rmb<bits<8> op
              [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
                                (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
                                imm:$cc))],
-             IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+             itins.rm>, EVEX_4V, EVEX_B,
+             Sched<[itins.Sched.Folded, ReadAfterLd]>;
   def rmibk : AVX512AIi8<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
                                        _.ScalarMemOp:$src2, AVX512ICC:$cc),
@@ -2197,7 +2200,8 @@ multiclass avx512_icmp_cc_rmb<bits<8> op
                                   (OpNode (_.VT _.RC:$src1),
                                     (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
                                     imm:$cc)))],
-              IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+              itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
+              Sched<[itins.Sched.Folded, ReadAfterLd]>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
@@ -2207,14 +2211,16 @@ multiclass avx512_icmp_cc_rmb<bits<8> op
                !strconcat("vpcmp", Suffix,
                    "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
                    "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
-               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
+               [], itins.rm>, EVEX_4V, EVEX_B,
+               Sched<[itins.Sched.Folded, ReadAfterLd]>;
     def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
                (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
                                        _.ScalarMemOp:$src2, u8imm:$cc),
                !strconcat("vpcmp", Suffix,
                   "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
                   "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
-               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
+               [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
+               Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
 
   def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
@@ -2231,49 +2237,61 @@ multiclass avx512_icmp_cc_rmb<bits<8> op
 }
 
 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
-                             AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+                             OpndItins itins, AVX512VLVectorVTInfo VTInfo,
+                             Predicate prd> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
+  defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
+                          EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
-    defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
+    defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
+                               EVEX_V256;
+    defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
+                               EVEX_V128;
   }
 }
 
 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
-                                AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+                                 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
+                                 Predicate prd> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
+  defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
            EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
+    defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
                 EVEX_V256;
-    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
+    defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
                 EVEX_V128;
   }
 }
 
-defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
-                                HasBWI>, EVEX_CD8<8, CD8VF>;
-defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
-                                 HasBWI>, EVEX_CD8<8, CD8VF>;
-
-defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
-                                HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
-                                 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
-
-defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
-                                    HasAVX512>, EVEX_CD8<32, CD8VF>;
-defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
-                                     HasAVX512>, EVEX_CD8<32, CD8VF>;
-
-defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
-                                    HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
-                                     HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+// FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
+defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
+                                avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
+defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
+                                 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
+
+defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
+                                avx512vl_i16_info, HasBWI>,
+                                VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
+                                 avx512vl_i16_info, HasBWI>,
+                                 VEX_W, EVEX_CD8<16, CD8VF>;
+
+defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
+                                    avx512vl_i32_info, HasAVX512>,
+                                    EVEX_CD8<32, CD8VF>;
+defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
+                                     avx512vl_i32_info, HasAVX512>,
+                                     EVEX_CD8<32, CD8VF>;
+
+defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
+                                    avx512vl_i64_info, HasAVX512>,
+                                    VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
+                                     avx512vl_i64_info, HasAVX512>,
+                                     VEX_W, EVEX_CD8<64, CD8VF>;
 
 
 multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319760&r1=319759&r2=319760&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Tue Dec  5 04:14:36 2017
@@ -401,7 +401,7 @@ define <16 x i32> @vpaddd_mask_test(<16
 ; GENERIC-LABEL: vpaddd_mask_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -421,7 +421,7 @@ define <16 x i32> @vpaddd_maskz_test(<16
 ; GENERIC-LABEL: vpaddd_maskz_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -441,7 +441,7 @@ define <16 x i32> @vpaddd_mask_fold_test
 ; GENERIC-LABEL: vpaddd_mask_fold_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -462,7 +462,7 @@ define <16 x i32> @vpaddd_mask_broadcast
 ; GENERIC-LABEL: vpaddd_mask_broadcast_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -482,7 +482,7 @@ define <16 x i32> @vpaddd_maskz_fold_tes
 ; GENERIC-LABEL: vpaddd_maskz_fold_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -503,7 +503,7 @@ define <16 x i32> @vpaddd_maskz_broadcas
 ; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -718,7 +718,7 @@ define <16 x float> @test_mask_vaddps(<1
 ; GENERIC-LABEL: test_mask_vaddps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -740,7 +740,7 @@ define <16 x float> @test_mask_vmulps(<1
 ; GENERIC-LABEL: test_mask_vmulps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -760,7 +760,7 @@ define <16 x float> @test_mask_vminps(<1
 ; GENERIC-LABEL: test_mask_vminps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -781,7 +781,7 @@ define <8 x double> @test_mask_vminpd(<8
 ; GENERIC-LABEL: test_mask_vminpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -802,7 +802,7 @@ define <16 x float> @test_mask_vmaxps(<1
 ; GENERIC-LABEL: test_mask_vmaxps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -823,7 +823,7 @@ define <8 x double> @test_mask_vmaxpd(<8
 ; GENERIC-LABEL: test_mask_vmaxpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
+; GENERIC-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -844,7 +844,7 @@ define <16 x float> @test_mask_vsubps(<1
 ; GENERIC-LABEL: test_mask_vsubps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -864,7 +864,7 @@ define <16 x float> @test_mask_vdivps(<1
 ; GENERIC-LABEL: test_mask_vdivps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [24:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -884,7 +884,7 @@ define <8 x double> @test_mask_vaddpd(<8
 ; GENERIC-LABEL: test_mask_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm4, %zmm3, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm4, %zmm3, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -904,7 +904,7 @@ define <8 x double> @test_maskz_vaddpd(<
 ; GENERIC-LABEL: test_maskz_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -924,7 +924,7 @@ define <8 x double> @test_mask_fold_vadd
 ; GENERIC-LABEL: test_mask_fold_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -945,7 +945,7 @@ define <8 x double> @test_maskz_fold_vad
 ; GENERIC-LABEL: test_maskz_fold_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -984,7 +984,7 @@ define <8 x double> @test_mask_broadcast
 ; GENERIC-LABEL: test_mask_broadcast_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm0, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm0, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [7:1.00]
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -1010,7 +1010,7 @@ define <8 x double> @test_maskz_broadcas
 ; GENERIC-LABEL: test_maskz_broadcast_vaddpd:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [7:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2965,7 +2965,7 @@ define <2 x float> @ubto2f32(<2 x i32> %
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
 ; GENERIC-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -2988,7 +2988,7 @@ define <2 x double> @ubto2f64(<2 x i32>
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1
+; GENERIC-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4423,7 +4423,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %
 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
 ; GENERIC-LABEL: sext_8i1_8i32:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpled %ymm0, %ymm1, %k0
+; GENERIC-NEXT:    vpcmpled %ymm0, %ymm1, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2d %k0, %ymm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6402,7 +6402,7 @@ define <16 x i32> @mov_test32(i8 * %addr
 ; GENERIC-LABEL: mov_test32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6423,7 +6423,7 @@ define <16 x i32> @mov_test33(i8 * %addr
 ; GENERIC-LABEL: mov_test33:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6444,7 +6444,7 @@ define <16 x i32> @mov_test34(i8 * %addr
 ; GENERIC-LABEL: mov_test34:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6465,7 +6465,7 @@ define <16 x i32> @mov_test35(i8 * %addr
 ; GENERIC-LABEL: mov_test35:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6486,7 +6486,7 @@ define <8 x i64> @mov_test36(i8 * %addr,
 ; GENERIC-LABEL: mov_test36:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6507,7 +6507,7 @@ define <8 x i64> @mov_test37(i8 * %addr,
 ; GENERIC-LABEL: mov_test37:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6528,7 +6528,7 @@ define <8 x i64> @mov_test38(i8 * %addr,
 ; GENERIC-LABEL: mov_test38:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6549,7 +6549,7 @@ define <8 x i64> @mov_test39(i8 * %addr,
 ; GENERIC-LABEL: mov_test39:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqq %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpneqq %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -6947,7 +6947,7 @@ define i8 @shuf_test1(i16 %v) nounwind {
 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
 ; GENERIC-LABEL: zext_test1:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
+; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    kshiftlw $10, %k0, %k0
 ; GENERIC-NEXT:    kshiftrw $15, %k0, %k0
 ; GENERIC-NEXT:    kmovd %k0, %eax
@@ -6973,7 +6973,7 @@ define i32 @zext_test1(<16 x i32> %a, <1
 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
 ; GENERIC-LABEL: zext_test2:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
+; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    kshiftlw $10, %k0, %k0
 ; GENERIC-NEXT:    kshiftrw $15, %k0, %k0
 ; GENERIC-NEXT:    kmovd %k0, %eax
@@ -7001,7 +7001,7 @@ define i16 @zext_test2(<16 x i32> %a, <1
 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
 ; GENERIC-LABEL: zext_test3:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
+; GENERIC-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    kshiftlw $10, %k0, %k0
 ; GENERIC-NEXT:    kshiftrw $15, %k0, %k0
 ; GENERIC-NEXT:    kmovd %k0, %eax
@@ -7150,7 +7150,7 @@ define <16 x i8> @vcmp_test8(<16 x i32>%
 ; GENERIC-NEXT:    cmpl %esi, %edi # sched: [1:0.33]
 ; GENERIC-NEXT:    jg .LBB386_1 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.2:
-; GENERIC-NEXT:    vpcmpltud %zmm2, %zmm1, %k0
+; GENERIC-NEXT:    vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpmovm2b %k0, %xmm0
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8339,7 +8339,7 @@ define   <16 x float> @_ss16xfloat_mask(
 ; GENERIC-LABEL: _ss16xfloat_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8362,7 +8362,7 @@ define   <16 x float> @_ss16xfloat_maskz
 ; GENERIC-LABEL: _ss16xfloat_maskz:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8399,7 +8399,7 @@ define   <16 x float> @_ss16xfloat_mask_
 ; GENERIC-LABEL: _ss16xfloat_mask_load:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8421,7 +8421,7 @@ define   <16 x float> @_ss16xfloat_maskz
 ; GENERIC-LABEL: _ss16xfloat_maskz_load:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1
+; GENERIC-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8458,7 +8458,7 @@ define   <8 x double> @_sd8xdouble_mask(
 ; GENERIC-LABEL: _sd8xdouble_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %ymm3, %ymm2, %k1
+; GENERIC-NEXT:    vpcmpneqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
 ; GENERIC-NEXT:    vmovapd %zmm1, %zmm0
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8481,7 +8481,7 @@ define   <8 x double> @_sd8xdouble_maskz
 ; GENERIC-LABEL: _sd8xdouble_maskz:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8518,7 +8518,7 @@ define   <8 x double> @_sd8xdouble_mask_
 ; GENERIC-LABEL: _sd8xdouble_mask_load:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %ymm2, %ymm1, %k1
+; GENERIC-NEXT:    vpcmpneqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8540,7 +8540,7 @@ define   <8 x double> @_sd8xdouble_maskz
 ; GENERIC-LABEL: _sd8xdouble_maskz_load:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT:    vpcmpneqd %ymm1, %ymm0, %k1
+; GENERIC-NEXT:    vpcmpneqd %ymm1, %ymm0, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;




More information about the llvm-commits mailing list