[llvm] r350800 - [X86] Disable DomainReassignment pass when AVX512BW is disabled to avoid injecting VK32/VK64 references into the MachineIR

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 9 23:43:54 PST 2019


Author: ctopper
Date: Wed Jan  9 23:43:54 2019
New Revision: 350800

URL: http://llvm.org/viewvc/llvm-project?rev=350800&view=rev
Log:
[X86] Disable DomainReassignment pass when AVX512BW is disabled to avoid injecting VK32/VK64 references into the MachineIR

Summary:
This pass replaces GR8/GR16/GR32/GR64 with their equivalent sized mask register classes. But VK32/VK64 aren't legal without AVX512BW. Apparently this mostly appears to work if the register coalescer is able to remove the VK32/VK64 register class reference. Or if we don't ever spill it. But there's no guarantee of that.

Another Intel employee managed to trigger a crash due to this with ISPC. Unfortunately, I've lost the test case he sent me at the time. I'm trying to get him to reproduce it for me. I'd like to get this in before 8.0 branches since its a little scary.

The regressions here are unfortunate, but I think we can make some improvements to DAG combine, load folding, etc. to fix them. Just not sure if we can get that done for 8.0.

Fixes PR39741

Reviewers: RKSimon, spatel

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D56460

Modified:
    llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/vec_floor.ll

Modified: llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp Wed Jan  9 23:43:54 2019
@@ -732,7 +732,10 @@ bool X86DomainReassignment::runOnMachine
   STI = &MF.getSubtarget<X86Subtarget>();
   // GPR->K is the only transformation currently supported, bail out early if no
   // AVX512.
-  if (!STI->hasAVX512())
+  // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and
+  // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register
+  // coalescer doesn't clean it up and we generate a spill we will crash.
+  if (!STI->hasAVX512() || !STI->hasBWI())
     return false;
 
   MRI = &MF.getRegInfo();

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Wed Jan  9 23:43:54 2019
@@ -169,7 +169,8 @@ entry:
 define <16 x float> @test_mm512_mask_shuffle_f32x4(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_mask_shuffle_f32x4:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
 ; X86-NEXT:    retl
 ;
@@ -188,7 +189,8 @@ entry:
 define <16 x float> @test_mm512_maskz_shuffle_f32x4(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_maskz_shuffle_f32x4:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
 ; X86-NEXT:    retl
 ;
@@ -267,7 +269,8 @@ entry:
 define <8 x i64> @test_mm512_mask_shuffle_i32x4(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm512_mask_shuffle_i32x4:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
 ; X86-NEXT:    retl
 ;
@@ -289,7 +292,8 @@ entry:
 define <8 x i64> @test_mm512_maskz_shuffle_i32x4(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
 ; X86-LABEL: test_mm512_maskz_shuffle_i32x4:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
 ; X86-NEXT:    retl
 ;
@@ -377,7 +381,8 @@ entry:
 define zeroext i16 @test_mm512_mask_testn_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_testn_epi32_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestnmd %zmm0, %zmm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
 ; X86-NEXT:    movzwl %ax, %eax
@@ -448,7 +453,8 @@ entry:
 define zeroext i16 @test_mm512_mask_test_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_test_epi32_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vptestmd %zmm0, %zmm1, %k0 {%k1}
 ; X86-NEXT:    kmovw %k0, %eax
 ; X86-NEXT:    movzwl %ax, %eax
@@ -505,7 +511,8 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X86-LABEL: test_mm512_mask_set1_epi32:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpbroadcastd %eax, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -528,7 +535,8 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X86-LABEL: test_mm512_maskz_set1_epi32:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -607,7 +615,8 @@ define <8 x i64> @test_mm512_broadcastd_
 define <8 x i64> @test_mm512_mask_broadcastd_epi32(<8 x i64> %a0, i16 %a1, <2 x i64> %a2) {
 ; X86-LABEL: test_mm512_mask_broadcastd_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd %xmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -628,7 +637,8 @@ define <8 x i64> @test_mm512_mask_broadc
 define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) {
 ; X86-LABEL: test_mm512_maskz_broadcastd_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd %xmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -751,7 +761,8 @@ define <16 x float> @test_mm512_broadcas
 define <16 x float> @test_mm512_mask_broadcastss_ps(<16 x float> %a0, i16 %a1, <4 x float> %a2) {
 ; X86-LABEL: test_mm512_mask_broadcastss_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastss %xmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -769,7 +780,8 @@ define <16 x float> @test_mm512_mask_bro
 define <16 x float> @test_mm512_maskz_broadcastss_ps(i16 %a0, <4 x float> %a1) {
 ; X86-LABEL: test_mm512_maskz_broadcastss_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -843,7 +855,8 @@ define <16 x float> @test_mm512_movehdup
 define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
 ; X86-LABEL: test_mm512_mask_movehdup_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
 ; X86-NEXT:    retl
 ;
@@ -861,7 +874,8 @@ define <16 x float> @test_mm512_mask_mov
 define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) {
 ; X86-LABEL: test_mm512_maskz_movehdup_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
 ; X86-NEXT:    retl
 ;
@@ -888,7 +902,8 @@ define <16 x float> @test_mm512_moveldup
 define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
 ; X86-LABEL: test_mm512_mask_moveldup_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
 ; X86-NEXT:    retl
 ;
@@ -906,7 +921,8 @@ define <16 x float> @test_mm512_mask_mov
 define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) {
 ; X86-LABEL: test_mm512_maskz_moveldup_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
 ; X86-NEXT:    retl
 ;
@@ -980,7 +996,8 @@ define <16 x float> @test_mm512_permute_
 define <16 x float> @test_mm512_mask_permute_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
 ; X86-LABEL: test_mm512_mask_permute_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
 ; X86-NEXT:    retl
 ;
@@ -998,7 +1015,8 @@ define <16 x float> @test_mm512_mask_per
 define <16 x float> @test_mm512_maskz_permute_ps(i16 %a0, <16 x float> %a1) {
 ; X86-LABEL: test_mm512_maskz_permute_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
 ; X86-NEXT:    retl
 ;
@@ -1121,7 +1139,8 @@ define <8 x i64> @test_mm512_shuffle_epi
 define <8 x i64> @test_mm512_mask_shuffle_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2) {
 ; X86-LABEL: test_mm512_mask_shuffle_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
 ; X86-NEXT:    retl
 ;
@@ -1142,7 +1161,8 @@ define <8 x i64> @test_mm512_mask_shuffl
 define <8 x i64> @test_mm512_maskz_shuffle_epi32(i16 %a0, <8 x i64> %a1) {
 ; X86-LABEL: test_mm512_maskz_shuffle_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
 ; X86-NEXT:    retl
 ;
@@ -1221,7 +1241,8 @@ define <8 x i64> @test_mm512_unpackhi_ep
 define <8 x i64> @test_mm512_mask_unpackhi_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
 ; X86-LABEL: test_mm512_mask_unpackhi_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
 ; X86-NEXT:    retl
 ;
@@ -1243,7 +1264,8 @@ define <8 x i64> @test_mm512_mask_unpack
 define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
 ; X86-LABEL: test_mm512_maskz_unpackhi_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
 ; X86-NEXT:    retl
 ;
@@ -1367,7 +1389,8 @@ define <16 x float> @test_mm512_unpackhi
 define <16 x float> @test_mm512_mask_unpackhi_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
 ; X86-LABEL: test_mm512_mask_unpackhi_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
 ; X86-NEXT:    retl
 ;
@@ -1385,7 +1408,8 @@ define <16 x float> @test_mm512_mask_unp
 define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
 ; X86-LABEL: test_mm512_maskz_unpackhi_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
 ; X86-NEXT:    retl
 ;
@@ -1415,7 +1439,8 @@ define <8 x i64> @test_mm512_unpacklo_ep
 define <8 x i64> @test_mm512_mask_unpacklo_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
 ; X86-LABEL: test_mm512_mask_unpacklo_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
 ; X86-NEXT:    retl
 ;
@@ -1437,7 +1462,8 @@ define <8 x i64> @test_mm512_mask_unpack
 define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
 ; X86-LABEL: test_mm512_maskz_unpacklo_epi32:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; X86-NEXT:    retl
 ;
@@ -1561,7 +1587,8 @@ define <16 x float> @test_mm512_unpacklo
 define <16 x float> @test_mm512_mask_unpacklo_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
 ; X86-LABEL: test_mm512_mask_unpacklo_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
 ; X86-NEXT:    retl
 ;
@@ -1579,7 +1606,8 @@ define <16 x float> @test_mm512_mask_unp
 define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
 ; X86-LABEL: test_mm512_maskz_unpacklo_ps:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
 ; X86-NEXT:    retl
 ;
@@ -1978,7 +2006,8 @@ entry:
 define <2 x i64> @test_mm512_mask_cvtepi32_epi8(<2 x i64> %__O, i16 zeroext %__M, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_mask_cvtepi32_epi8:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovdb %zmm1, %xmm0 {%k1}
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
@@ -2000,7 +2029,8 @@ entry:
 define <2 x i64> @test_mm512_maskz_cvtepi32_epi8(i16 zeroext %__M, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_maskz_cvtepi32_epi8:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpmovdb %zmm0, %xmm0 {%k1} {z}
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
@@ -2149,7 +2179,8 @@ declare <16 x i32> @llvm.x86.avx512.pter
 define <8 x i64> @test_mm512_mask_ternarylogic_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
 ; X86-LABEL: test_mm512_mask_ternarylogic_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -2172,7 +2203,8 @@ entry:
 define <8 x i64> @test_mm512_maskz_ternarylogic_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
 ; X86-LABEL: test_mm512_maskz_ternarylogic_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -2249,7 +2281,8 @@ declare <16 x i32> @llvm.x86.avx512.vper
 define <8 x i64> @test_mm512_mask2_permutex2var_epi32(<8 x i64> %__A, <8 x i64> %__I, i16 zeroext %__U, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask2_permutex2var_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; X86-NEXT:    retl
@@ -2301,7 +2334,8 @@ declare <16 x float> @llvm.x86.avx512.vp
 define <16 x float> @test_mm512_mask2_permutex2var_ps(<16 x float> %__A, <8 x i64> %__I, i16 zeroext %__U, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_mask2_permutex2var_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermi2ps %zmm2, %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vmovaps %zmm1, %zmm0
 ; X86-NEXT:    retl
@@ -2362,7 +2396,8 @@ entry:
 define <8 x i64> @test_mm512_maskz_permutex2var_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_permutex2var_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2d %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -2385,7 +2420,8 @@ entry:
 define <8 x i64> @test_mm512_mask_permutex2var_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__I, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_permutex2var_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2d %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -2469,7 +2505,8 @@ entry:
 define <16 x float> @test_mm512_mask_permutex2var_ps(<16 x float> %__A, i16 zeroext %__U, <8 x i64> %__I, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_mask_permutex2var_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2ps %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -2489,7 +2526,8 @@ entry:
 define <16 x float> @test_mm512_maskz_permutex2var_ps(i16 zeroext %__U, <16 x float> %__A, <8 x i64> %__I, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_maskz_permutex2var_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpermt2ps %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -3427,7 +3465,8 @@ declare <16 x float> @llvm.x86.avx512.vf
 define <16 x float> @test_mm512_mask_fmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fmadd_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -3446,7 +3485,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmadd_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -3467,7 +3507,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmadd_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -3504,7 +3545,8 @@ entry:
 define <16 x float> @test_mm512_mask_fmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fmsub_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -3524,7 +3566,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmsub_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -3562,7 +3605,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fnmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fnmadd_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -3584,7 +3628,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fnmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fnmadd_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -3619,7 +3664,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fnmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fnmsub_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -3650,7 +3696,8 @@ entry:
 define <16 x float> @test_mm512_mask_fmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
 ; X86-NEXT:    retl
 ;
@@ -3669,7 +3716,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -3690,7 +3738,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
 ; X86-NEXT:    retl
 ;
@@ -3727,7 +3776,8 @@ entry:
 define <16 x float> @test_mm512_mask_fmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) - zmm2
 ; X86-NEXT:    retl
 ;
@@ -3747,7 +3797,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
 ; X86-NEXT:    retl
 ;
@@ -3785,7 +3836,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fnmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fnmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) + zmm2
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -3807,7 +3859,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fnmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fnmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
 ; X86-NEXT:    retl
 ;
@@ -3842,7 +3895,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fnmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fnmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
 ; X86-NEXT:    retl
 ;
@@ -4152,7 +4206,8 @@ declare <16 x float> @llvm.x86.avx512.vf
 define <16 x float> @test_mm512_mask_fmaddsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fmaddsub_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -4171,7 +4226,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fmaddsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmaddsub_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -4192,7 +4248,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fmaddsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmaddsub_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -4229,7 +4286,8 @@ entry:
 define <16 x float> @test_mm512_mask_fmsubadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fmsubadd_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -4249,7 +4307,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fmsubadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmsubadd_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -4282,7 +4341,8 @@ entry:
 define <16 x float> @test_mm512_mask_fmaddsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fmaddsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
 ; X86-NEXT:    retl
 ;
@@ -4304,7 +4364,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fmaddsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmaddsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -4328,7 +4389,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fmaddsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmaddsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
 ; X86-NEXT:    retl
 ;
@@ -4363,7 +4425,8 @@ entry:
 define <16 x float> @test_mm512_mask_fmsubadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fmsubadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) -/+ zmm2
 ; X86-NEXT:    retl
 ;
@@ -4385,7 +4448,8 @@ entry:
 define <16 x float> @test_mm512_maskz_fmsubadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_maskz_fmsubadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2
 ; X86-NEXT:    retl
 ;
@@ -4453,7 +4517,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmsub_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -4475,7 +4540,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -4545,7 +4611,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fmsubadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmsubadd_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -4567,7 +4634,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fmsubadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fmsubadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfmsubadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -4633,7 +4701,8 @@ entry:
 define <16 x float> @test_mm512_mask_fnmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fnmadd_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -4653,7 +4722,8 @@ entry:
 define <16 x float> @test_mm512_mask_fnmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fnmadd_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
 ; X86-NEXT:    retl
 ;
@@ -4765,7 +4835,8 @@ entry:
 define <16 x float> @test_mm512_mask_fnmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fnmsub_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -4786,7 +4857,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fnmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fnmsub_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -4809,7 +4881,8 @@ entry:
 define <16 x float> @test_mm512_mask_fnmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
 ; X86-LABEL: test_mm512_mask_fnmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
 ; X86-NEXT:    retl
 ;
@@ -4830,7 +4903,8 @@ entry:
 define <16 x float> @test_mm512_mask3_fnmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
 ; X86-LABEL: test_mm512_mask3_fnmsub_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vfnmsub231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2
 ; X86-NEXT:    vmovaps %zmm2, %zmm0
 ; X86-NEXT:    retl
@@ -6252,7 +6326,8 @@ define <8 x i64> @test_mm512_mask_expand
 ; X86-LABEL: test_mm512_mask_expandloadu_epi32:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -6274,7 +6349,8 @@ define <8 x i64> @test_mm512_maskz_expan
 ; X86-LABEL: test_mm512_maskz_expandloadu_epi32:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -6295,7 +6371,8 @@ define <16 x float> @test_mm512_mask_exp
 ; X86-LABEL: test_mm512_mask_expandloadu_ps:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -6315,7 +6392,8 @@ define <16 x float> @test_mm512_maskz_ex
 ; X86-LABEL: test_mm512_maskz_expandloadu_ps:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %ecx, %k1
 ; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -6380,9 +6458,10 @@ entry:
 define void @test_mm512_mask_compressstoreu_ps(i8* %__P, i16 zeroext %__U, <16 x float> %__A) {
 ; X86-LABEL: test_mm512_mask_compressstoreu_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    vcompressps %zmm0, (%eax) {%k1}
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vcompressps %zmm0, (%ecx) {%k1}
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
 ;
@@ -6402,9 +6481,10 @@ entry:
 define void @test_mm512_mask_compressstoreu_epi32(i8* %__P, i16 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_mask_compressstoreu_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    vpcompressd %zmm0, (%eax) {%k1}
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    kmovw %eax, %k1
+; X86-NEXT:    vpcompressd %zmm0, (%ecx) {%k1}
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
 ;
@@ -6973,7 +7053,8 @@ entry:
 define i32 @test_mm512_mask_reduce_add_epi32(i16 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_add_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X86-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
@@ -7029,7 +7110,8 @@ entry:
 define i32 @test_mm512_mask_reduce_mul_epi32(i16 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_mul_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
@@ -7087,7 +7169,8 @@ entry:
 define i32 @test_mm512_mask_reduce_and_epi32(i16 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_and_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
@@ -7141,7 +7224,8 @@ entry:
 define i32 @test_mm512_mask_reduce_or_epi32(i16 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_or_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X86-NEXT:    vpor %ymm1, %ymm0, %ymm0
@@ -7499,7 +7583,8 @@ define float @test_mm512_mask_reduce_add
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0
@@ -7555,7 +7640,8 @@ define float @test_mm512_mask_reduce_mul
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
 ; X86-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vextractf64x4 $1, %zmm1, %ymm0
@@ -8415,7 +8501,8 @@ entry:
 define i32 @test_mm512_mask_reduce_max_epi32(i16 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_max_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
@@ -8477,7 +8564,8 @@ entry:
 define i32 @test_mm512_mask_reduce_max_epu32(i16 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_max_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; X86-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
@@ -8539,7 +8627,8 @@ define float @test_mm512_mask_reduce_max
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastss {{.*#+}} zmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf]
 ; X86-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vextractf64x4 $1, %zmm1, %ymm0
@@ -8595,7 +8684,8 @@ entry:
 define i32 @test_mm512_mask_reduce_min_epi32(i16 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_min_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
@@ -8657,7 +8747,8 @@ entry:
 define i32 @test_mm512_mask_reduce_min_epu32(i16 zeroext %__M, <8 x i64> %__W) {
 ; X86-LABEL: test_mm512_mask_reduce_min_epu32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
@@ -8721,7 +8812,8 @@ define float @test_mm512_mask_reduce_min
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vbroadcastss {{.*#+}} zmm1 = [+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf]
 ; X86-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
 ; X86-NEXT:    vextractf64x4 $1, %zmm1, %ymm0
@@ -8817,7 +8909,8 @@ entry:
 define <16 x float> @test_mm512_mask_max_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_mask_max_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -8888,7 +8981,8 @@ entry:
 define <16 x float> @test_mm512_maskz_max_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_maskz_max_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -8907,7 +9001,8 @@ entry:
 define <16 x float> @test_mm512_mask_max_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_mask_max_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -8928,7 +9023,8 @@ declare <16 x float> @llvm.x86.avx512.ma
 define <16 x float> @test_mm512_maskz_max_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_maskz_max_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -9049,7 +9145,8 @@ entry:
 define <16 x float> @test_mm512_mask_min_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_mask_min_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -9068,7 +9165,8 @@ entry:
 define <16 x float> @test_mm512_maskz_min_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_maskz_min_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -9087,7 +9185,8 @@ entry:
 define <16 x float> @test_mm512_mask_min_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_mask_min_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -9108,7 +9207,8 @@ declare <16 x float> @llvm.x86.avx512.mi
 define <16 x float> @test_mm512_maskz_min_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
 ; X86-LABEL: test_mm512_maskz_min_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -9249,7 +9349,8 @@ entry:
 define <16 x float> @test_mm512_mask_sqrt_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A) {
 ; X86-LABEL: test_mm512_mask_sqrt_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtps %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -9268,7 +9369,8 @@ entry:
 define <16 x float> @test_mm512_maskz_sqrt_ps(i16 zeroext %__U, <16 x float> %__A) {
 ; X86-LABEL: test_mm512_maskz_sqrt_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtps %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -9287,7 +9389,8 @@ entry:
 define <16 x float> @test_mm512_mask_sqrt_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A) {
 ; X86-LABEL: test_mm512_mask_sqrt_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtps {rn-sae}, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -9308,7 +9411,8 @@ declare <16 x float> @llvm.x86.avx512.sq
 define <16 x float> @test_mm512_maskz_sqrt_round_ps(i16 zeroext %__U, <16 x float> %__A) {
 ; X86-LABEL: test_mm512_maskz_sqrt_round_ps:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vsqrtps {rn-sae}, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -9349,7 +9453,8 @@ entry:
 define <8 x i64> @test_mm512_mask_rol_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_mask_rol_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprold $5, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -9371,7 +9476,8 @@ entry:
 define <8 x i64> @test_mm512_maskz_rol_epi32(i16 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_maskz_rol_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprold $5, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -9455,7 +9561,8 @@ entry:
 define <8 x i64> @test_mm512_mask_rolv_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_rolv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvd %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -9478,7 +9585,8 @@ entry:
 define <8 x i64> @test_mm512_maskz_rolv_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_rolv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -9563,7 +9671,8 @@ entry:
 define <8 x i64> @test_mm512_mask_ror_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_mask_ror_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprord $5, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -9585,7 +9694,8 @@ entry:
 define <8 x i64> @test_mm512_maskz_ror_epi32(i16 zeroext %__U, <8 x i64> %__A) {
 ; X86-LABEL: test_mm512_maskz_ror_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprord $5, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;
@@ -9669,7 +9779,8 @@ entry:
 define <8 x i64> @test_mm512_mask_rorv_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_mask_rorv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvd %zmm2, %zmm1, %zmm0 {%k1}
 ; X86-NEXT:    retl
 ;
@@ -9692,7 +9803,8 @@ entry:
 define <8 x i64> @test_mm512_maskz_rorv_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X86-LABEL: test_mm512_maskz_rorv_epi32:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    kmovw %eax, %k1
 ; X86-NEXT:    vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z}
 ; X86-NEXT:    retl
 ;

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Jan  9 23:43:54 2019
@@ -1278,19 +1278,23 @@ define <64 x i8> @test17(i64 %x, i32 %y,
 define <8 x i1> @test18(i8 %a, i16 %y) {
 ; KNL-LABEL: test18:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    kmovw %esi, %k2
-; KNL-NEXT:    kshiftrw $8, %k2, %k0
-; KNL-NEXT:    kshiftrw $9, %k2, %k2
-; KNL-NEXT:    kshiftrw $6, %k1, %k3
-; KNL-NEXT:    kxorw %k2, %k3, %k2
-; KNL-NEXT:    kshiftlw $15, %k2, %k2
-; KNL-NEXT:    kshiftrw $9, %k2, %k2
+; KNL-NEXT:    kmovw %edi, %k0
+; KNL-NEXT:    kmovw %esi, %k1
+; KNL-NEXT:    kshiftrw $8, %k1, %k2
+; KNL-NEXT:    kmovw %k2, %eax
+; KNL-NEXT:    kshiftrw $9, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %ecx
+; KNL-NEXT:    kshiftrw $6, %k0, %k1
+; KNL-NEXT:    kmovw %ecx, %k2
 ; KNL-NEXT:    kxorw %k2, %k1, %k1
-; KNL-NEXT:    kshiftlw $9, %k1, %k1
+; KNL-NEXT:    kshiftlw $15, %k1, %k1
 ; KNL-NEXT:    kshiftrw $9, %k1, %k1
-; KNL-NEXT:    kshiftlw $7, %k0, %k0
-; KNL-NEXT:    korw %k0, %k1, %k1
+; KNL-NEXT:    kxorw %k1, %k0, %k0
+; KNL-NEXT:    kshiftlw $9, %k0, %k0
+; KNL-NEXT:    kshiftrw $9, %k0, %k0
+; KNL-NEXT:    kmovw %eax, %k1
+; KNL-NEXT:    kshiftlw $7, %k1, %k1
+; KNL-NEXT:    korw %k1, %k0, %k1
 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0
 ; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
@@ -1337,19 +1341,23 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
 ;
 ; AVX512DQ-LABEL: test18:
 ; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    kmovw %edi, %k1
-; AVX512DQ-NEXT:    kmovw %esi, %k2
-; AVX512DQ-NEXT:    kshiftrw $8, %k2, %k0
-; AVX512DQ-NEXT:    kshiftrw $9, %k2, %k2
-; AVX512DQ-NEXT:    kshiftrb $6, %k1, %k3
-; AVX512DQ-NEXT:    kxorb %k2, %k3, %k2
-; AVX512DQ-NEXT:    kshiftlb $7, %k2, %k2
-; AVX512DQ-NEXT:    kshiftrb $1, %k2, %k2
+; AVX512DQ-NEXT:    kmovw %edi, %k0
+; AVX512DQ-NEXT:    kmovw %esi, %k1
+; AVX512DQ-NEXT:    kshiftrw $8, %k1, %k2
+; AVX512DQ-NEXT:    kmovw %k2, %eax
+; AVX512DQ-NEXT:    kshiftrw $9, %k1, %k1
+; AVX512DQ-NEXT:    kmovw %k1, %ecx
+; AVX512DQ-NEXT:    kshiftrb $6, %k0, %k1
+; AVX512DQ-NEXT:    kmovw %ecx, %k2
 ; AVX512DQ-NEXT:    kxorb %k2, %k1, %k1
-; AVX512DQ-NEXT:    kshiftlb $1, %k1, %k1
+; AVX512DQ-NEXT:    kshiftlb $7, %k1, %k1
 ; AVX512DQ-NEXT:    kshiftrb $1, %k1, %k1
-; AVX512DQ-NEXT:    kshiftlb $7, %k0, %k0
-; AVX512DQ-NEXT:    korb %k0, %k1, %k0
+; AVX512DQ-NEXT:    kxorb %k1, %k0, %k0
+; AVX512DQ-NEXT:    kshiftlb $1, %k0, %k0
+; AVX512DQ-NEXT:    kshiftrb $1, %k0, %k0
+; AVX512DQ-NEXT:    kmovw %eax, %k1
+; AVX512DQ-NEXT:    kshiftlb $7, %k1, %k1
+; AVX512DQ-NEXT:    korb %k1, %k0, %k0
 ; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
 ; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
 ; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Wed Jan  9 23:43:54 2019
@@ -1617,7 +1617,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2]
 ; X86-NEXT:    vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1670,7 +1671,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovsqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2]
 ; X86-NEXT:    vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1723,7 +1725,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovusqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2]
 ; X86-NEXT:    vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1776,7 +1779,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2]
 ; X86-NEXT:    vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1833,7 +1837,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovsqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2]
 ; X86-NEXT:    vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1890,7 +1895,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovusqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2]
 ; X86-NEXT:    vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2619,7 +2625,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_128:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2]
 ; X86-NEXT:    vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2672,7 +2679,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovsdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2]
 ; X86-NEXT:    vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2725,7 +2733,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovusdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2]
 ; X86-NEXT:    vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2778,7 +2787,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmov_db_256:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2]
 ; X86-NEXT:    vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2835,7 +2845,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovsdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2]
 ; X86-NEXT:    vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2892,7 +2903,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpmovusdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2]
 ; X86-NEXT:    vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]

Modified: llvm/trunk/test/CodeGen/X86/vec_floor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_floor.ll?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_floor.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_floor.ll Wed Jan  9 23:43:54 2019
@@ -1694,7 +1694,8 @@ define <4 x float> @floor_mask_ss_mask8(
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
+; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %k1
 ; AVX512F-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -1702,7 +1703,8 @@ define <4 x float> @floor_mask_ss_mask8(
 ;
 ; AVX512VL-LABEL: floor_mask_ss_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
+; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %k1
 ; AVX512VL-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512VL-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
@@ -1753,14 +1755,16 @@ define <4 x float> @floor_maskz_ss_mask8
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
+; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %k1
 ; AVX512F-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: floor_maskz_ss_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
+; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %k1
 ; AVX512VL-NEXT:    vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512VL-NEXT:    retq
   %mask1 = fcmp oeq <4 x float> %x, %y
@@ -1804,7 +1808,8 @@ define <2 x double> @floor_mask_sd_mask8
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
+; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %k1
 ; AVX512F-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -1812,7 +1817,8 @@ define <2 x double> @floor_mask_sd_mask8
 ;
 ; AVX512VL-LABEL: floor_mask_sd_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1
+; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %k1
 ; AVX512VL-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512VL-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
@@ -1863,14 +1869,16 @@ define <2 x double> @floor_maskz_sd_mask
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
+; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %k1
 ; AVX512F-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: floor_maskz_sd_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1
+; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %k1
 ; AVX512VL-NEXT:    vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512VL-NEXT:    retq
   %mask1 = fcmp oeq <2 x double> %x, %y
@@ -2758,7 +2766,8 @@ define <4 x float> @ceil_mask_ss_mask8(<
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
+; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %k1
 ; AVX512F-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -2766,7 +2775,8 @@ define <4 x float> @ceil_mask_ss_mask8(<
 ;
 ; AVX512VL-LABEL: ceil_mask_ss_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
+; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %k1
 ; AVX512VL-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512VL-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
@@ -2817,14 +2827,16 @@ define <4 x float> @ceil_maskz_ss_mask8(
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
+; AVX512F-NEXT:    vcmpeqps %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %k1
 ; AVX512F-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: ceil_maskz_ss_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
+; AVX512VL-NEXT:    vcmpeqps %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %k1
 ; AVX512VL-NEXT:    vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512VL-NEXT:    retq
   %mask1 = fcmp oeq <4 x float> %x, %y
@@ -2868,7 +2880,8 @@ define <2 x double> @ceil_mask_sd_mask8(
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
+; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %k1
 ; AVX512F-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512F-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -2876,7 +2889,8 @@ define <2 x double> @ceil_mask_sd_mask8(
 ;
 ; AVX512VL-LABEL: ceil_mask_sd_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1
+; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %k1
 ; AVX512VL-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
 ; AVX512VL-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512VL-NEXT:    retq
@@ -2927,14 +2941,16 @@ define <2 x double> @ceil_maskz_sd_mask8
 ; AVX512F:       ## %bb.0:
 ; AVX512F-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512F-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
+; AVX512F-NEXT:    vcmpeqpd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %k1
 ; AVX512F-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: ceil_maskz_sd_mask8:
 ; AVX512VL:       ## %bb.0:
-; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k1
+; AVX512VL-NEXT:    vcmpeqpd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovw %k0, %k1
 ; AVX512VL-NEXT:    vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
 ; AVX512VL-NEXT:    retq
   %mask1 = fcmp oeq <2 x double> %x, %y




More information about the llvm-commits mailing list