[llvm] r319895 - [X86][AVX512] Regenerate vpmovm2*/vpmov*2m avx512 schedule tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 6 06:07:38 PST 2017


Author: rksimon
Date: Wed Dec  6 06:07:38 2017
New Revision: 319895

URL: http://llvm.org/viewvc/llvm-project?rev=319895&view=rev
Log:
[X86][AVX512] Regenerate vpmovm2*/vpmov*2m avx512 schedule tests

Modified:
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319895&r1=319894&r2=319895&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed Dec  6 06:07:38 2017
@@ -2361,7 +2361,7 @@ define <16 x float> @sbto16f32(<16 x i32
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %zmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2369,7 +2369,7 @@ define <16 x float> @sbto16f32(<16 x i32
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %zmm0
+; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -2471,9 +2471,9 @@ define <16 x double> @sbto16f64(<16 x do
 ; GENERIC-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k1, %ymm0
+; GENERIC-NEXT:    vpmovm2d %k1, %ymm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %ymm1
+; GENERIC-NEXT:    vpmovm2d %k0, %ymm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2482,9 +2482,9 @@ define <16 x double> @sbto16f64(<16 x do
 ; SKX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
 ; SKX-NEXT:    vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k1, %ymm0
+; SKX-NEXT:    vpmovm2d %k1, %ymm0 # sched: [1:0.25]
 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %ymm1
+; SKX-NEXT:    vpmovm2d %k0, %ymm1 # sched: [1:0.25]
 ; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
@@ -2497,7 +2497,7 @@ define <8 x double> @sbto8f64(<8 x doubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %ymm0
+; GENERIC-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2505,7 +2505,7 @@ define <8 x double> @sbto8f64(<8 x doubl
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; SKX-NEXT:    vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %ymm0
+; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
 ; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
@@ -2518,7 +2518,7 @@ define <8 x float> @sbto8f32(<8 x float>
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %ymm0
+; GENERIC-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2526,7 +2526,7 @@ define <8 x float> @sbto8f32(<8 x float>
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; SKX-NEXT:    vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %ymm0
+; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
 ; SKX-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
@@ -2539,7 +2539,7 @@ define <4 x float> @sbto4f32(<4 x float>
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2547,7 +2547,7 @@ define <4 x float> @sbto4f32(<4 x float>
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; SKX-NEXT:    vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
@@ -2560,7 +2560,7 @@ define <4 x double> @sbto4f64(<4 x doubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2568,7 +2568,7 @@ define <4 x double> @sbto4f64(<4 x doubl
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; SKX-NEXT:    vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
@@ -2581,7 +2581,7 @@ define <2 x float> @sbto2f32(<2 x float>
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2589,7 +2589,7 @@ define <2 x float> @sbto2f32(<2 x float>
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; SKX-NEXT:    vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
@@ -2602,7 +2602,7 @@ define <2 x double> @sbto2f64(<2 x doubl
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2q %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2610,7 +2610,7 @@ define <2 x double> @sbto2f64(<2 x doubl
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
 ; SKX-NEXT:    vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %xmm0
+; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
@@ -3010,7 +3010,7 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8
 ; GENERIC-LABEL: zext_8x8mem_to_8x16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3030,7 +3030,7 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8
 ; GENERIC-LABEL: sext_8x8mem_to_8x16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3051,7 +3051,7 @@ define <16 x i16> @zext_16x8mem_to_16x16
 ; GENERIC-LABEL: zext_16x8mem_to_16x16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3071,7 +3071,7 @@ define <16 x i16> @sext_16x8mem_to_16x16
 ; GENERIC-LABEL: sext_16x8mem_to_16x16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3105,7 +3105,7 @@ define <16 x i16> @zext_16x8_to_16x16_ma
 ; GENERIC-LABEL: zext_16x8_to_16x16_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm1, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3138,7 +3138,7 @@ define <16 x i16> @sext_16x8_to_16x16_ma
 ; GENERIC-LABEL: sext_16x8_to_16x16_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm1, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3157,7 +3157,7 @@ define <32 x i16> @zext_32x8mem_to_32x16
 ; GENERIC-LABEL: zext_32x8mem_to_32x16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %ymm0, %k1
+; GENERIC-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3177,7 +3177,7 @@ define <32 x i16> @sext_32x8mem_to_32x16
 ; GENERIC-LABEL: sext_32x8mem_to_32x16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %ymm0, %k1
+; GENERIC-NEXT:    vpmovb2m %ymm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3211,7 +3211,7 @@ define <32 x i16> @zext_32x8_to_32x16_ma
 ; GENERIC-LABEL: zext_32x8_to_32x16_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %ymm1, %k1
+; GENERIC-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3244,7 +3244,7 @@ define <32 x i16> @sext_32x8_to_32x16_ma
 ; GENERIC-LABEL: sext_32x8_to_32x16_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %ymm1, %k1
+; GENERIC-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3303,7 +3303,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8
 ; GENERIC-LABEL: zext_8x8mem_to_8x32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3323,7 +3323,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8
 ; GENERIC-LABEL: sext_8x8mem_to_8x32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3343,7 +3343,7 @@ define <16 x i32> @zext_16x8mem_to_16x32
 ; GENERIC-LABEL: zext_16x8mem_to_16x32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3363,7 +3363,7 @@ define <16 x i32> @sext_16x8mem_to_16x32
 ; GENERIC-LABEL: sext_16x8mem_to_16x32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3383,7 +3383,7 @@ define <16 x i32> @zext_16x8_to_16x32_ma
 ; GENERIC-LABEL: zext_16x8_to_16x32_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm1, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3402,7 +3402,7 @@ define <16 x i32> @sext_16x8_to_16x32_ma
 ; GENERIC-LABEL: sext_16x8_to_16x32_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm1, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3557,7 +3557,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8
 ; GENERIC-LABEL: zext_8x8mem_to_8x64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3577,7 +3577,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mas
 ; GENERIC-LABEL: sext_8x8mem_to_8x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3668,7 +3668,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<
 ; GENERIC-LABEL: zext_8x16mem_to_8x32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3688,7 +3688,7 @@ define <8 x i32> @sext_8x16mem_to_8x32ma
 ; GENERIC-LABEL: sext_8x16mem_to_8x32mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3723,7 +3723,7 @@ define <8 x i32> @zext_8x16_to_8x32mask(
 ; GENERIC-LABEL: zext_8x16_to_8x32mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm1, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3756,7 +3756,7 @@ define <16 x i32> @zext_16x16mem_to_16x3
 ; GENERIC-LABEL: zext_16x16mem_to_16x32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3776,7 +3776,7 @@ define <16 x i32> @sext_16x16mem_to_16x3
 ; GENERIC-LABEL: sext_16x16mem_to_16x32mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3810,7 +3810,7 @@ define <16 x i32> @zext_16x16_to_16x32ma
 ; GENERIC-LABEL: zext_16x16_to_16x32mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm1, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3953,7 +3953,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<
 ; GENERIC-LABEL: zext_8x16mem_to_8x64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -3973,7 +3973,7 @@ define <8 x i64> @sext_8x16mem_to_8x64ma
 ; GENERIC-LABEL: sext_8x16mem_to_8x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4008,7 +4008,7 @@ define <8 x i64> @zext_8x16_to_8x64mask(
 ; GENERIC-LABEL: zext_8x16_to_8x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm1, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4184,7 +4184,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<
 ; GENERIC-LABEL: zext_8x32mem_to_8x64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4204,7 +4204,7 @@ define <8 x i64> @sext_8x32mem_to_8x64ma
 ; GENERIC-LABEL: sext_8x32mem_to_8x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4253,7 +4253,7 @@ define <8 x i64> @zext_8x32_to_8x64mask(
 ; GENERIC-LABEL: zext_8x32_to_8x64mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm1, %k1
+; GENERIC-NEXT:    vpmovw2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -4333,7 +4333,7 @@ define i16 @trunc_16i8_to_16i1(<16 x i8>
 ; GENERIC-LABEL: trunc_16i8_to_16i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k0
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax
 ; GENERIC-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4380,7 +4380,7 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x
 ; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
 ; GENERIC-NEXT:    vpslld $31, %xmm1, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: trunc_4i32_to_4i1:
@@ -4389,7 +4389,7 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x
 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0 {%k1} # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %mask_a = trunc <4 x i32>%a to <4 x i1>
   %mask_b = trunc <4 x i32>%b to <4 x i1>
@@ -4403,7 +4403,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %
 ; GENERIC-LABEL: trunc_8i16_to_8i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k0
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, %eax
 ; GENERIC-NEXT:    # kill: %al<def> %al<kill> %eax<kill>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -4424,13 +4424,13 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32
 ; GENERIC-LABEL: sext_8i1_8i32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpled %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %ymm0
+; GENERIC-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8i1_8i32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpled %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %ymm0
+; SKX-NEXT:    vpmovm2d %k0, %ymm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x = icmp slt <8 x i32> %a1, %a2
   %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
@@ -4475,14 +4475,14 @@ define <8 x i16> @sext_8i1_8i16(<8 x i32
 ; GENERIC-LABEL: sext_8i1_8i16:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2w %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8i1_8i16:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x = icmp slt <8 x i32> %a1, %a2
@@ -4494,13 +4494,13 @@ define <16 x i32> @sext_16i1_16i32(<16 x
 ; GENERIC-LABEL: sext_16i1_16i32:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %zmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_16i1_16i32:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %zmm0
+; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x = icmp slt <16 x i32> %a1, %a2
   %y = sext <16 x i1> %x to <16 x i32>
@@ -4511,13 +4511,13 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32
 ; GENERIC-LABEL: sext_8i1_8i64:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2q %k0, %zmm0
+; GENERIC-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: sext_8i1_8i64:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %zmm0
+; SKX-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x = icmp slt <8 x i32> %a1, %a2
   %y = sext <8 x i1> %x to <8 x i64>
@@ -4548,7 +4548,7 @@ define <64 x i16> @test21(<64 x i16> %x
 ; GENERIC-LABEL: test21:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %zmm2, %zmm2 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovb2m %zmm2, %k1
+; GENERIC-NEXT:    vpmovb2m %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
 ; GENERIC-NEXT:    kshiftrq $32, %k1, %k1
 ; GENERIC-NEXT:    vmovdqu16 %zmm1, %zmm1 {%k1} {z}
@@ -4585,7 +4585,7 @@ define <16 x i16> @shuffle_zext_16x8_to_
 ; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm1, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -5006,7 +5006,7 @@ define <16 x float> @test_x86_fmadd132_p
 ; GENERIC-LABEL: test_x86_fmadd132_ps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm2, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm2 # sched: [9:1.00]
 ; GENERIC-NEXT:    vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -5030,7 +5030,7 @@ define <16 x float> @test_x86_fmadd231_p
 ; GENERIC-LABEL: test_x86_fmadd231_ps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm2, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmulps (%rdi), %zmm0, %zmm0 # sched: [9:1.00]
 ; GENERIC-NEXT:    vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
@@ -5056,7 +5056,7 @@ define <16 x float> @test_x86_fmadd213_p
 ; GENERIC-LABEL: test_x86_fmadd213_ps:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm2, %k1
+; GENERIC-NEXT:    vpmovb2m %xmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
 ; GENERIC-NEXT:    vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [7:1.00]
 ; GENERIC-NEXT:    vmovaps %zmm1, %zmm0
@@ -7058,7 +7058,7 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 ; GENERIC-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    kandnw %k0, %k1, %k0
-; GENERIC-NEXT:    vpmovm2d %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7067,7 +7067,7 @@ define <4 x i32> @test4(<4 x i64> %x, <4
 ; SKX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
 ; SKX-NEXT:    kandnw %k0, %k1, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x_gt_y = icmp sgt <4 x i64> %x, %y
@@ -7083,7 +7083,7 @@ define <2 x i64> @vcmp_test5(<2 x i64> %
 ; GENERIC-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 # sched: [3:1.00]
 ; GENERIC-NEXT:    vpcmpgtq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; GENERIC-NEXT:    kandnw %k1, %k0, %k0
-; GENERIC-NEXT:    vpmovm2q %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vcmp_test5:
@@ -7091,7 +7091,7 @@ define <2 x i64> @vcmp_test5(<2 x i64> %
 ; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    vpcmpgtq %xmm3, %xmm2, %k1 # sched: [3:1.00]
 ; SKX-NEXT:    kandnw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %xmm0
+; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %x_gt_y = icmp slt <2 x i64> %x, %y
   %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
@@ -7115,7 +7115,7 @@ define void @vcmp_test7(<8 x i1> %mask)
 ; GENERIC-LABEL: vcmp_test7:
 ; GENERIC:       # %bb.0: # %allocas
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k0
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    movb $85, %al # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %eax, %k1
 ; GENERIC-NEXT:    korb %k1, %k0, %k0
@@ -7151,12 +7151,12 @@ define <16 x i8> @vcmp_test8(<16 x i32>%
 ; GENERIC-NEXT:    jg .LBB386_1 # sched: [1:1.00]
 ; GENERIC-NEXT:  # %bb.2:
 ; GENERIC-NEXT:    vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2b %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB386_1:
 ; GENERIC-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2b %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7167,12 +7167,12 @@ define <16 x i8> @vcmp_test8(<16 x i32>%
 ; SKX-NEXT:    jg .LBB386_1 # sched: [1:0.50]
 ; SKX-NEXT:  # %bb.2:
 ; SKX-NEXT:    vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ; SKX-NEXT:  .LBB386_1:
 ; SKX-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %cond = icmp sgt i32 %a1, %b1
@@ -7193,8 +7193,8 @@ define <16 x i1> @vpmov_test9(<16 x i1>%
 ; GENERIC-NEXT:  .LBB387_1:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB387_3:
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k0
-; GENERIC-NEXT:    vpmovm2b %k0, %xmm0
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vpmov_test9:
@@ -7208,7 +7208,7 @@ define <16 x i1> @vpmov_test9(<16 x i1>%
 ; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:  .LBB387_3:
 ; SKX-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %mask = icmp sgt i32 %a1, %b1
   %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
@@ -7231,7 +7231,7 @@ define <4 x i1> @vmov_test11(<4 x i1>%a,
 ; GENERIC-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB389_3:
 ; GENERIC-NEXT:    vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vmov_test11:
@@ -7245,7 +7245,7 @@ define <4 x i1> @vmov_test11(<4 x i1>%a,
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
 ; SKX-NEXT:  .LBB389_3:
 ; SKX-NEXT:    vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %mask = icmp sgt i32 %a1, %b1
   %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
@@ -7298,7 +7298,7 @@ define <16 x i1> @vmov_test15(i32 %x, i3
 ; GENERIC-NEXT:    movw $1, %cx # sched: [1:0.33]
 ; GENERIC-NEXT:    cmovgw %ax, %cx # sched: [2:0.67]
 ; GENERIC-NEXT:    kmovd %ecx, %k0
-; GENERIC-NEXT:    vpmovm2b %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vmov_test15:
@@ -7309,7 +7309,7 @@ define <16 x i1> @vmov_test15(i32 %x, i3
 ; SKX-NEXT:    movw $1, %cx # sched: [1:0.25]
 ; SKX-NEXT:    cmovgw %ax, %cx # sched: [1:0.50]
 ; SKX-NEXT:    kmovd %ecx, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a = bitcast i16 21845 to <16 x i1>
   %b = bitcast i16 1 to <16 x i1>
@@ -7325,15 +7325,15 @@ define <64 x i8> @vmov_test16(i64 %x) {
 ; GENERIC-NEXT:    kmovq %rdi, %k0
 ; GENERIC-NEXT:    movb $1, %al # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %eax, %k1
-; GENERIC-NEXT:    vpmovm2b %k1, %zmm0
+; GENERIC-NEXT:    vpmovm2b %k1, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovm2b %k0, %zmm1
+; GENERIC-NEXT:    vpmovm2b %k0, %zmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    movl $32, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %eax, %k1
 ; GENERIC-NEXT:    vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %zmm0, %k0
-; GENERIC-NEXT:    vpmovm2b %k0, %zmm0
+; GENERIC-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vmov_test16:
@@ -7341,15 +7341,15 @@ define <64 x i8> @vmov_test16(i64 %x) {
 ; SKX-NEXT:    kmovq %rdi, %k0 # sched: [1:1.00]
 ; SKX-NEXT:    movb $1, %al # sched: [1:0.25]
 ; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2b %k1, %zmm0
+; SKX-NEXT:    vpmovm2b %k1, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpmovm2b %k0, %zmm1
+; SKX-NEXT:    vpmovm2b %k0, %zmm1 # sched: [1:0.25]
 ; SKX-NEXT:    movl $32, %eax # sched: [1:0.25]
 ; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33]
 ; SKX-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00]
 ; SKX-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %zmm0
+; SKX-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a = bitcast i64 %x to <64 x i1>
   %b = insertelement <64 x i1>%a, i1 true, i32 5
@@ -7365,15 +7365,15 @@ define <64 x i8> @vmov_test17(i64 %x, i3
 ; GENERIC-NEXT:    cmpl %edx, %esi # sched: [1:0.33]
 ; GENERIC-NEXT:    setg %al # sched: [1:0.50]
 ; GENERIC-NEXT:    kmovd %eax, %k1
-; GENERIC-NEXT:    vpmovm2b %k1, %zmm0
+; GENERIC-NEXT:    vpmovm2b %k1, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovm2b %k0, %zmm1
+; GENERIC-NEXT:    vpmovm2b %k0, %zmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    movl $32, %eax # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %eax, %k1
 ; GENERIC-NEXT:    vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
 ; GENERIC-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %zmm0, %k0
-; GENERIC-NEXT:    vpmovm2b %k0, %zmm0
+; GENERIC-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: vmov_test17:
@@ -7382,15 +7382,15 @@ define <64 x i8> @vmov_test17(i64 %x, i3
 ; SKX-NEXT:    cmpl %edx, %esi # sched: [1:0.25]
 ; SKX-NEXT:    setg %al # sched: [1:0.50]
 ; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2b %k1, %zmm0
+; SKX-NEXT:    vpmovm2b %k1, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT:    vpmovm2b %k0, %zmm1
+; SKX-NEXT:    vpmovm2b %k0, %zmm1 # sched: [1:0.25]
 ; SKX-NEXT:    movl $32, %eax # sched: [1:0.25]
 ; SKX-NEXT:    kmovd %eax, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33]
 ; SKX-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00]
 ; SKX-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %zmm0
+; SKX-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %a = bitcast i64 %x to <64 x i1>
   %b = icmp sgt i32 %y, %z
@@ -7408,16 +7408,16 @@ define <8 x i1> @vmov_test18(i8 %a, i16
 ; GENERIC-NEXT:    kshiftrw $15, %k0, %k0
 ; GENERIC-NEXT:    kshiftlw $6, %k2, %k2
 ; GENERIC-NEXT:    kshiftrw $15, %k2, %k2
-; GENERIC-NEXT:    vpmovm2q %k1, %zmm0
-; GENERIC-NEXT:    vpmovm2q %k2, %zmm1
+; GENERIC-NEXT:    vpmovm2q %k1, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT:    vpmovm2q %k2, %zmm1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [4:0.50]
 ; GENERIC-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovq2m %zmm2, %k1
+; GENERIC-NEXT:    vpmovq2m %zmm2, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    kshiftlb $1, %k1, %k1
 ; GENERIC-NEXT:    kshiftrb $1, %k1, %k1
 ; GENERIC-NEXT:    kshiftlb $7, %k0, %k0
 ; GENERIC-NEXT:    korb %k0, %k1, %k0
-; GENERIC-NEXT:    vpmovm2w %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7429,8 +7429,8 @@ define <8 x i1> @vmov_test18(i8 %a, i16
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    kshiftlw $6, %k2, %k2 # sched: [3:1.00]
 ; SKX-NEXT:    kshiftrw $15, %k2, %k2 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2q %k1, %zmm0
-; SKX-NEXT:    vpmovm2q %k2, %zmm1
+; SKX-NEXT:    vpmovm2q %k1, %zmm0 # sched: [1:0.25]
+; SKX-NEXT:    vpmovm2q %k2, %zmm1 # sched: [1:0.25]
 ; SKX-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [8:0.50]
 ; SKX-NEXT:    vpermi2q %zmm1, %zmm0, %zmm2 # sched: [3:1.00]
 ; SKX-NEXT:    vpmovq2m %zmm2, %k1 # sched: [1:1.00]
@@ -7438,7 +7438,7 @@ define <8 x i1> @vmov_test18(i8 %a, i16
 ; SKX-NEXT:    kshiftrb $1, %k1, %k1 # sched: [3:1.00]
 ; SKX-NEXT:    kshiftlb $7, %k0, %k0 # sched: [3:1.00]
 ; SKX-NEXT:    korb %k0, %k1, %k0 # sched: [1:1.00]
-; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = bitcast i8 %a to <8 x i1>
@@ -7453,7 +7453,7 @@ define <32 x i16> @vmov_test21(<32 x i16
 ; GENERIC-LABEL: vmov_test21:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %ymm1, %k1
+; GENERIC-NEXT:    vpmovb2m %ymm1, %k1 # sched: [1:0.33]
 ; GENERIC-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7570,7 +7570,7 @@ define void @store_v8i1(<8 x i1> %c , <8
 ; GENERIC-LABEL: store_v8i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k0
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    knotb %k0, %k0
 ; GENERIC-NEXT:    kmovb %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7591,7 +7591,7 @@ define void @store_v16i1(<16 x i1> %c ,
 ; GENERIC-LABEL: store_v16i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k0
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    knotw %k0, %k0
 ; GENERIC-NEXT:    kmovw %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -7859,13 +7859,13 @@ define <8 x i64> @load_8i1(<8 x i1>* %a)
 ; GENERIC-LABEL: load_8i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovb (%rdi), %k0
-; GENERIC-NEXT:    vpmovm2q %k0, %zmm0
+; GENERIC-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: load_8i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %zmm0
+; SKX-NEXT:    vpmovm2q %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = load <8 x i1>, <8 x i1>* %a
   %c = sext <8 x i1> %b to <8 x i64>
@@ -7876,13 +7876,13 @@ define <16 x i32> @load_16i1(<16 x i1>*
 ; GENERIC-LABEL: load_16i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovw (%rdi), %k0
-; GENERIC-NEXT:    vpmovm2d %k0, %zmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: load_16i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    kmovw (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %zmm0
+; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = load <16 x i1>, <16 x i1>* %a
   %c = sext <16 x i1> %b to <16 x i32>
@@ -7893,13 +7893,13 @@ define <2 x i16> @load_2i1(<2 x i1>* %a)
 ; GENERIC-LABEL: load_2i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovb (%rdi), %k0
-; GENERIC-NEXT:    vpmovm2q %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: load_2i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT:    vpmovm2q %k0, %xmm0
+; SKX-NEXT:    vpmovm2q %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = load <2 x i1>, <2 x i1>* %a
   %c = sext <2 x i1> %b to <2 x i16>
@@ -7910,13 +7910,13 @@ define <4 x i16> @load_4i1(<4 x i1>* %a)
 ; GENERIC-LABEL: load_4i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovb (%rdi), %k0
-; GENERIC-NEXT:    vpmovm2d %k0, %xmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: load_4i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = load <4 x i1>, <4 x i1>* %a
   %c = sext <4 x i1> %b to <4 x i16>
@@ -7927,13 +7927,13 @@ define <32 x i16> @load_32i1(<32 x i1>*
 ; GENERIC-LABEL: load_32i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovd (%rdi), %k0
-; GENERIC-NEXT:    vpmovm2w %k0, %zmm0
+; GENERIC-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: load_32i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    kmovd (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT:    vpmovm2w %k0, %zmm0
+; SKX-NEXT:    vpmovm2w %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = load <32 x i1>, <32 x i1>* %a
   %c = sext <32 x i1> %b to <32 x i16>
@@ -7944,13 +7944,13 @@ define <64 x i8> @load_64i1(<64 x i1>* %
 ; GENERIC-LABEL: load_64i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    kmovq (%rdi), %k0
-; GENERIC-NEXT:    vpmovm2b %k0, %zmm0
+; GENERIC-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: load_64i1:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    kmovq (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT:    vpmovm2b %k0, %zmm0
+; SKX-NEXT:    vpmovm2b %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    retq # sched: [7:1.00]
   %b = load <64 x i1>, <64 x i1>* %a
   %c = sext <64 x i1> %b to <64 x i8>
@@ -7961,7 +7961,7 @@ define void @store_8i1(<8 x i1>* %a, <8
 ; GENERIC-LABEL: store_8i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k0
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7979,7 +7979,7 @@ define void @store_8i1_1(<8 x i1>* %a, <
 ; GENERIC-LABEL: store_8i1_1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovw2m %xmm0, %k0
+; GENERIC-NEXT:    vpmovw2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovb %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -7998,7 +7998,7 @@ define void @store_16i1(<16 x i1>* %a, <
 ; GENERIC-LABEL: store_16i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %xmm0, %k0
+; GENERIC-NEXT:    vpmovb2m %xmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovw %k0, (%rdi)
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -8016,7 +8016,7 @@ define void @store_32i1(<32 x i1>* %a, <
 ; GENERIC-LABEL: store_32i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT:    vpmovb2m %ymm0, %k0
+; GENERIC-NEXT:    vpmovb2m %ymm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, (%rdi)
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8036,7 +8036,7 @@ define void @store_32i1_1(<32 x i1>* %a,
 ; GENERIC-LABEL: store_32i1_1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $15, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovw2m %zmm0, %k0
+; GENERIC-NEXT:    vpmovw2m %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovd %k0, (%rdi)
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8059,7 +8059,7 @@ define void @store_64i1(<64 x i1>* %a, <
 ; GENERIC-LABEL: store_64i1:
 ; GENERIC:       # %bb.0:
 ; GENERIC-NEXT:    vpsllw $7, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovb2m %zmm0, %k0
+; GENERIC-NEXT:    vpmovb2m %zmm0, %k0 # sched: [1:0.33]
 ; GENERIC-NEXT:    kmovq %k0, (%rdi)
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8591,7 +8591,7 @@ define <16 x i32> @test_vbroadcast() {
 ; GENERIC:       # %bb.0: # %entry
 ; GENERIC-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
 ; GENERIC-NEXT:    vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT:    vpmovm2d %k0, %zmm0
+; GENERIC-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.33]
 ; GENERIC-NEXT:    knotw %k0, %k1
 ; GENERIC-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
@@ -8600,7 +8600,7 @@ define <16 x i32> @test_vbroadcast() {
 ; SKX:       # %bb.0: # %entry
 ; SKX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT:    vpmovm2d %k0, %zmm0
+; SKX-NEXT:    vpmovm2d %k0, %zmm0 # sched: [1:0.25]
 ; SKX-NEXT:    knotw %k0, %k1 # sched: [1:1.00]
 ; SKX-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; SKX-NEXT:    retq # sched: [7:1.00]




More information about the llvm-commits mailing list