[llvm] r350800 - [X86] Disable DomainReassignment pass when AVX512BW is disabled to avoid injecting VK32/VK64 references into the MachineIR
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 9 23:43:54 PST 2019
Author: ctopper
Date: Wed Jan 9 23:43:54 2019
New Revision: 350800
URL: http://llvm.org/viewvc/llvm-project?rev=350800&view=rev
Log:
[X86] Disable DomainReassignment pass when AVX512BW is disabled to avoid injecting VK32/VK64 references into the MachineIR
Summary:
This pass replaces GR8/GR16/GR32/GR64 with their equivalent sized mask register classes. But VK32/VK64 aren't legal without AVX512BW. Apparently this mostly appears to work if the register coalescer is able to remove the VK32/VK64 register class reference. Or if we don't ever spill it. But there's no guarantee of that.
Another Intel employee managed to trigger a crash due to this with ISPC. Unfortunately, I've lost the test case he sent me at the time. I'm trying to get him to reproduce it for me. I'd like to get this in before 8.0 branches since its a little scary.
The regressions here are unfortunate, but I think we can make some improvements to DAG combine, load folding, etc. to fix them. Just not sure if we can get that done for 8.0.
Fixes PR39741
Reviewers: RKSimon, spatel
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D56460
Modified:
llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/vec_floor.ll
Modified: llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86DomainReassignment.cpp Wed Jan 9 23:43:54 2019
@@ -732,7 +732,10 @@ bool X86DomainReassignment::runOnMachine
STI = &MF.getSubtarget<X86Subtarget>();
// GPR->K is the only transformation currently supported, bail out early if no
// AVX512.
- if (!STI->hasAVX512())
+ // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and
+ // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register
+ // coalescer doesn't clean it up and we generate a spill we will crash.
+ if (!STI->hasAVX512() || !STI->hasBWI())
return false;
MRI = &MF.getRegInfo();
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Wed Jan 9 23:43:54 2019
@@ -169,7 +169,8 @@ entry:
define <16 x float> @test_mm512_mask_shuffle_f32x4(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_shuffle_f32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
; X86-NEXT: retl
;
@@ -188,7 +189,8 @@ entry:
define <16 x float> @test_mm512_maskz_shuffle_f32x4(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_shuffle_f32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X86-NEXT: retl
;
@@ -267,7 +269,8 @@ entry:
define <8 x i64> @test_mm512_mask_shuffle_i32x4(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X86-LABEL: test_mm512_mask_shuffle_i32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
; X86-NEXT: retl
;
@@ -289,7 +292,8 @@ entry:
define <8 x i64> @test_mm512_maskz_shuffle_i32x4(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X86-LABEL: test_mm512_maskz_shuffle_i32x4:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X86-NEXT: retl
;
@@ -377,7 +381,8 @@ entry:
define zeroext i16 @test_mm512_mask_testn_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_testn_epi32_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: movzwl %ax, %eax
@@ -448,7 +453,8 @@ entry:
define zeroext i16 @test_mm512_mask_test_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_test_epi32_mask:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
; X86-NEXT: kmovw %k0, %eax
; X86-NEXT: movzwl %ax, %eax
@@ -505,7 +511,8 @@ define <8 x i64> @test_mm512_mask_set1_e
; X86-LABEL: test_mm512_mask_set1_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpbroadcastd %eax, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -528,7 +535,8 @@ define <8 x i64> @test_mm512_maskz_set1_
; X86-LABEL: test_mm512_maskz_set1_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -607,7 +615,8 @@ define <8 x i64> @test_mm512_broadcastd_
define <8 x i64> @test_mm512_mask_broadcastd_epi32(<8 x i64> %a0, i16 %a1, <2 x i64> %a2) {
; X86-LABEL: test_mm512_mask_broadcastd_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -628,7 +637,8 @@ define <8 x i64> @test_mm512_mask_broadc
define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) {
; X86-LABEL: test_mm512_maskz_broadcastd_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -751,7 +761,8 @@ define <16 x float> @test_mm512_broadcas
define <16 x float> @test_mm512_mask_broadcastss_ps(<16 x float> %a0, i16 %a1, <4 x float> %a2) {
; X86-LABEL: test_mm512_mask_broadcastss_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss %xmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -769,7 +780,8 @@ define <16 x float> @test_mm512_mask_bro
define <16 x float> @test_mm512_maskz_broadcastss_ps(i16 %a0, <4 x float> %a1) {
; X86-LABEL: test_mm512_maskz_broadcastss_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -843,7 +855,8 @@ define <16 x float> @test_mm512_movehdup
define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_mask_movehdup_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X86-NEXT: retl
;
@@ -861,7 +874,8 @@ define <16 x float> @test_mm512_mask_mov
define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) {
; X86-LABEL: test_mm512_maskz_movehdup_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X86-NEXT: retl
;
@@ -888,7 +902,8 @@ define <16 x float> @test_mm512_moveldup
define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_mask_moveldup_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X86-NEXT: retl
;
@@ -906,7 +921,8 @@ define <16 x float> @test_mm512_mask_mov
define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) {
; X86-LABEL: test_mm512_maskz_moveldup_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X86-NEXT: retl
;
@@ -980,7 +996,8 @@ define <16 x float> @test_mm512_permute_
define <16 x float> @test_mm512_mask_permute_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_mask_permute_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X86-NEXT: retl
;
@@ -998,7 +1015,8 @@ define <16 x float> @test_mm512_mask_per
define <16 x float> @test_mm512_maskz_permute_ps(i16 %a0, <16 x float> %a1) {
; X86-LABEL: test_mm512_maskz_permute_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X86-NEXT: retl
;
@@ -1121,7 +1139,8 @@ define <8 x i64> @test_mm512_shuffle_epi
define <8 x i64> @test_mm512_mask_shuffle_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2) {
; X86-LABEL: test_mm512_mask_shuffle_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X86-NEXT: retl
;
@@ -1142,7 +1161,8 @@ define <8 x i64> @test_mm512_mask_shuffl
define <8 x i64> @test_mm512_maskz_shuffle_epi32(i16 %a0, <8 x i64> %a1) {
; X86-LABEL: test_mm512_maskz_shuffle_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X86-NEXT: retl
;
@@ -1221,7 +1241,8 @@ define <8 x i64> @test_mm512_unpackhi_ep
define <8 x i64> @test_mm512_mask_unpackhi_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X86-LABEL: test_mm512_mask_unpackhi_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
; X86-NEXT: retl
;
@@ -1243,7 +1264,8 @@ define <8 x i64> @test_mm512_mask_unpack
define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X86-LABEL: test_mm512_maskz_unpackhi_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X86-NEXT: retl
;
@@ -1367,7 +1389,8 @@ define <16 x float> @test_mm512_unpackhi
define <16 x float> @test_mm512_mask_unpackhi_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
; X86-LABEL: test_mm512_mask_unpackhi_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
; X86-NEXT: retl
;
@@ -1385,7 +1408,8 @@ define <16 x float> @test_mm512_mask_unp
define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_maskz_unpackhi_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X86-NEXT: retl
;
@@ -1415,7 +1439,8 @@ define <8 x i64> @test_mm512_unpacklo_ep
define <8 x i64> @test_mm512_mask_unpacklo_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X86-LABEL: test_mm512_mask_unpacklo_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
; X86-NEXT: retl
;
@@ -1437,7 +1462,8 @@ define <8 x i64> @test_mm512_mask_unpack
define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X86-LABEL: test_mm512_maskz_unpacklo_epi32:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X86-NEXT: retl
;
@@ -1561,7 +1587,8 @@ define <16 x float> @test_mm512_unpacklo
define <16 x float> @test_mm512_mask_unpacklo_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
; X86-LABEL: test_mm512_mask_unpacklo_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
; X86-NEXT: retl
;
@@ -1579,7 +1606,8 @@ define <16 x float> @test_mm512_mask_unp
define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; X86-LABEL: test_mm512_maskz_unpacklo_ps:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X86-NEXT: retl
;
@@ -1978,7 +2006,8 @@ entry:
define <2 x i64> @test_mm512_mask_cvtepi32_epi8(<2 x i64> %__O, i16 zeroext %__M, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_cvtepi32_epi8:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovdb %zmm1, %xmm0 {%k1}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
@@ -2000,7 +2029,8 @@ entry:
define <2 x i64> @test_mm512_maskz_cvtepi32_epi8(i16 zeroext %__M, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_cvtepi32_epi8:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
@@ -2149,7 +2179,8 @@ declare <16 x i32> @llvm.x86.avx512.pter
define <8 x i64> @test_mm512_mask_ternarylogic_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
; X86-LABEL: test_mm512_mask_ternarylogic_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -2172,7 +2203,8 @@ entry:
define <8 x i64> @test_mm512_maskz_ternarylogic_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X86-LABEL: test_mm512_maskz_ternarylogic_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -2249,7 +2281,8 @@ declare <16 x i32> @llvm.x86.avx512.vper
define <8 x i64> @test_mm512_mask2_permutex2var_epi32(<8 x i64> %__A, <8 x i64> %__I, i16 zeroext %__U, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask2_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2d %zmm2, %zmm0, %zmm1 {%k1}
; X86-NEXT: vmovdqa64 %zmm1, %zmm0
; X86-NEXT: retl
@@ -2301,7 +2334,8 @@ declare <16 x float> @llvm.x86.avx512.vp
define <16 x float> @test_mm512_mask2_permutex2var_ps(<16 x float> %__A, <8 x i64> %__I, i16 zeroext %__U, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask2_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1}
; X86-NEXT: vmovaps %zmm1, %zmm0
; X86-NEXT: retl
@@ -2362,7 +2396,8 @@ entry:
define <8 x i64> @test_mm512_maskz_permutex2var_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -2385,7 +2420,8 @@ entry:
define <8 x i64> @test_mm512_mask_permutex2var_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__I, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_permutex2var_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -2469,7 +2505,8 @@ entry:
define <16 x float> @test_mm512_mask_permutex2var_ps(<16 x float> %__A, i16 zeroext %__U, <8 x i64> %__I, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -2489,7 +2526,8 @@ entry:
define <16 x float> @test_mm512_maskz_permutex2var_ps(i16 zeroext %__U, <16 x float> %__A, <8 x i64> %__I, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_permutex2var_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -3427,7 +3465,8 @@ declare <16 x float> @llvm.x86.avx512.vf
define <16 x float> @test_mm512_mask_fmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -3446,7 +3485,8 @@ entry:
define <16 x float> @test_mm512_mask3_fmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -3467,7 +3507,8 @@ entry:
define <16 x float> @test_mm512_maskz_fmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -3504,7 +3545,8 @@ entry:
define <16 x float> @test_mm512_mask_fmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -3524,7 +3566,8 @@ entry:
define <16 x float> @test_mm512_maskz_fmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -3562,7 +3605,8 @@ entry:
define <16 x float> @test_mm512_mask3_fnmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -3584,7 +3628,8 @@ entry:
define <16 x float> @test_mm512_maskz_fnmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fnmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -3619,7 +3664,8 @@ entry:
define <16 x float> @test_mm512_maskz_fnmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fnmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -3650,7 +3696,8 @@ entry:
define <16 x float> @test_mm512_mask_fmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm2
; X86-NEXT: retl
;
@@ -3669,7 +3716,8 @@ entry:
define <16 x float> @test_mm512_mask3_fmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -3690,7 +3738,8 @@ entry:
define <16 x float> @test_mm512_maskz_fmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; X86-NEXT: retl
;
@@ -3727,7 +3776,8 @@ entry:
define <16 x float> @test_mm512_mask_fmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) - zmm2
; X86-NEXT: retl
;
@@ -3747,7 +3797,8 @@ entry:
define <16 x float> @test_mm512_maskz_fmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
; X86-NEXT: retl
;
@@ -3785,7 +3836,8 @@ entry:
define <16 x float> @test_mm512_mask3_fnmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) + zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -3807,7 +3859,8 @@ entry:
define <16 x float> @test_mm512_maskz_fnmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
; X86-NEXT: retl
;
@@ -3842,7 +3895,8 @@ entry:
define <16 x float> @test_mm512_maskz_fnmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
; X86-NEXT: retl
;
@@ -4152,7 +4206,8 @@ declare <16 x float> @llvm.x86.avx512.vf
define <16 x float> @test_mm512_mask_fmaddsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmaddsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -4171,7 +4226,8 @@ entry:
define <16 x float> @test_mm512_mask3_fmaddsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmaddsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -4192,7 +4248,8 @@ entry:
define <16 x float> @test_mm512_maskz_fmaddsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmaddsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -4229,7 +4286,8 @@ entry:
define <16 x float> @test_mm512_mask_fmsubadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmsubadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -4249,7 +4307,8 @@ entry:
define <16 x float> @test_mm512_maskz_fmsubadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmsubadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -4282,7 +4341,8 @@ entry:
define <16 x float> @test_mm512_mask_fmaddsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2
; X86-NEXT: retl
;
@@ -4304,7 +4364,8 @@ entry:
define <16 x float> @test_mm512_mask3_fmaddsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -4328,7 +4389,8 @@ entry:
define <16 x float> @test_mm512_maskz_fmaddsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmaddsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
; X86-NEXT: retl
;
@@ -4363,7 +4425,8 @@ entry:
define <16 x float> @test_mm512_mask_fmsubadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) -/+ zmm2
; X86-NEXT: retl
;
@@ -4385,7 +4448,8 @@ entry:
define <16 x float> @test_mm512_maskz_fmsubadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_maskz_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2
; X86-NEXT: retl
;
@@ -4453,7 +4517,8 @@ entry:
define <16 x float> @test_mm512_mask3_fmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -4475,7 +4540,8 @@ entry:
define <16 x float> @test_mm512_mask3_fmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -4545,7 +4611,8 @@ entry:
define <16 x float> @test_mm512_mask3_fmsubadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsubadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -4567,7 +4634,8 @@ entry:
define <16 x float> @test_mm512_mask3_fmsubadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fmsubadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfmsubadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -4633,7 +4701,8 @@ entry:
define <16 x float> @test_mm512_mask_fnmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fnmadd_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -4653,7 +4722,8 @@ entry:
define <16 x float> @test_mm512_mask_fnmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fnmadd_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2
; X86-NEXT: retl
;
@@ -4765,7 +4835,8 @@ entry:
define <16 x float> @test_mm512_mask_fnmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fnmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -4786,7 +4857,8 @@ entry:
define <16 x float> @test_mm512_mask3_fnmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmsub_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -4809,7 +4881,8 @@ entry:
define <16 x float> @test_mm512_mask_fnmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
; X86-LABEL: test_mm512_mask_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2
; X86-NEXT: retl
;
@@ -4830,7 +4903,8 @@ entry:
define <16 x float> @test_mm512_mask3_fnmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
; X86-LABEL: test_mm512_mask3_fnmsub_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vfnmsub231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2
; X86-NEXT: vmovaps %zmm2, %zmm0
; X86-NEXT: retl
@@ -6252,7 +6326,8 @@ define <8 x i64> @test_mm512_mask_expand
; X86-LABEL: test_mm512_mask_expandloadu_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -6274,7 +6349,8 @@ define <8 x i64> @test_mm512_maskz_expan
; X86-LABEL: test_mm512_maskz_expandloadu_epi32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -6295,7 +6371,8 @@ define <16 x float> @test_mm512_mask_exp
; X86-LABEL: test_mm512_mask_expandloadu_ps:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandps (%eax), %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -6315,7 +6392,8 @@ define <16 x float> @test_mm512_maskz_ex
; X86-LABEL: test_mm512_maskz_expandloadu_ps:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %ecx, %k1
; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -6380,9 +6458,10 @@ entry:
define void @test_mm512_mask_compressstoreu_ps(i8* %__P, i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_mask_compressstoreu_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vcompressps %zmm0, (%eax) {%k1}
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vcompressps %zmm0, (%ecx) {%k1}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
@@ -6402,9 +6481,10 @@ entry:
define void @test_mm512_mask_compressstoreu_epi32(i8* %__P, i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_compressstoreu_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1}
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: kmovw %eax, %k1
+; X86-NEXT: vpcompressd %zmm0, (%ecx) {%k1}
; X86-NEXT: vzeroupper
; X86-NEXT: retl
;
@@ -6973,7 +7053,8 @@ entry:
define i32 @test_mm512_mask_reduce_add_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_add_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X86-NEXT: vpaddd %ymm1, %ymm0, %ymm0
@@ -7029,7 +7110,8 @@ entry:
define i32 @test_mm512_mask_reduce_mul_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_mul_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{.*#+}} zmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
@@ -7087,7 +7169,8 @@ entry:
define i32 @test_mm512_mask_reduce_and_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_and_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
@@ -7141,7 +7224,8 @@ entry:
define i32 @test_mm512_mask_reduce_or_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_or_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X86-NEXT: vpor %ymm1, %ymm0, %ymm0
@@ -7499,7 +7583,8 @@ define float @test_mm512_mask_reduce_add
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; X86-NEXT: vaddps %ymm1, %ymm0, %ymm0
@@ -7555,7 +7640,8 @@ define float @test_mm512_mask_reduce_mul
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; X86-NEXT: vextractf64x4 $1, %zmm1, %ymm0
@@ -8415,7 +8501,8 @@ entry:
define i32 @test_mm512_mask_reduce_max_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_max_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
@@ -8477,7 +8564,8 @@ entry:
define i32 @test_mm512_mask_reduce_max_epu32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_max_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; X86-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
@@ -8539,7 +8627,8 @@ define float @test_mm512_mask_reduce_max
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss {{.*#+}} zmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf]
; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; X86-NEXT: vextractf64x4 $1, %zmm1, %ymm0
@@ -8595,7 +8684,8 @@ entry:
define i32 @test_mm512_mask_reduce_min_epi32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_min_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpbroadcastd {{.*#+}} zmm1 = [2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647,2147483647]
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
@@ -8657,7 +8747,8 @@ entry:
define i32 @test_mm512_mask_reduce_min_epu32(i16 zeroext %__M, <8 x i64> %__W) {
; X86-LABEL: test_mm512_mask_reduce_min_epu32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0
@@ -8721,7 +8812,8 @@ define float @test_mm512_mask_reduce_min
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vbroadcastss {{.*#+}} zmm1 = [+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf]
; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; X86-NEXT: vextractf64x4 $1, %zmm1, %ymm0
@@ -8817,7 +8909,8 @@ entry:
define <16 x float> @test_mm512_mask_max_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_max_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -8888,7 +8981,8 @@ entry:
define <16 x float> @test_mm512_maskz_max_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_max_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -8907,7 +9001,8 @@ entry:
define <16 x float> @test_mm512_mask_max_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_max_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -8928,7 +9023,8 @@ declare <16 x float> @llvm.x86.avx512.ma
define <16 x float> @test_mm512_maskz_max_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_max_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -9049,7 +9145,8 @@ entry:
define <16 x float> @test_mm512_mask_min_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_min_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -9068,7 +9165,8 @@ entry:
define <16 x float> @test_mm512_maskz_min_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_min_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -9087,7 +9185,8 @@ entry:
define <16 x float> @test_mm512_mask_min_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_mask_min_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -9108,7 +9207,8 @@ declare <16 x float> @llvm.x86.avx512.mi
define <16 x float> @test_mm512_maskz_min_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X86-LABEL: test_mm512_maskz_min_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -9249,7 +9349,8 @@ entry:
define <16 x float> @test_mm512_mask_sqrt_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_mask_sqrt_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -9268,7 +9369,8 @@ entry:
define <16 x float> @test_mm512_maskz_sqrt_ps(i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_maskz_sqrt_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -9287,7 +9389,8 @@ entry:
define <16 x float> @test_mm512_mask_sqrt_round_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_mask_sqrt_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps {rn-sae}, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -9308,7 +9411,8 @@ declare <16 x float> @llvm.x86.avx512.sq
define <16 x float> @test_mm512_maskz_sqrt_round_ps(i16 zeroext %__U, <16 x float> %__A) {
; X86-LABEL: test_mm512_maskz_sqrt_round_ps:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vsqrtps {rn-sae}, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -9349,7 +9453,8 @@ entry:
define <8 x i64> @test_mm512_mask_rol_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_rol_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprold $5, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -9371,7 +9476,8 @@ entry:
define <8 x i64> @test_mm512_maskz_rol_epi32(i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_rol_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprold $5, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -9455,7 +9561,8 @@ entry:
define <8 x i64> @test_mm512_mask_rolv_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_rolv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -9478,7 +9585,8 @@ entry:
define <8 x i64> @test_mm512_maskz_rolv_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_rolv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -9563,7 +9671,8 @@ entry:
define <8 x i64> @test_mm512_mask_ror_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_mask_ror_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprord $5, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -9585,7 +9694,8 @@ entry:
define <8 x i64> @test_mm512_maskz_ror_epi32(i16 zeroext %__U, <8 x i64> %__A) {
; X86-LABEL: test_mm512_maskz_ror_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprord $5, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -9669,7 +9779,8 @@ entry:
define <8 x i64> @test_mm512_mask_rorv_epi32(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_rorv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT: retl
;
@@ -9692,7 +9803,8 @@ entry:
define <8 x i64> @test_mm512_maskz_rorv_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_rorv_epi32:
; X86: # %bb.0: # %entry
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: kmovw %eax, %k1
; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Jan 9 23:43:54 2019
@@ -1278,19 +1278,23 @@ define <64 x i8> @test17(i64 %x, i32 %y,
define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-LABEL: test18:
; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k1
-; KNL-NEXT: kmovw %esi, %k2
-; KNL-NEXT: kshiftrw $8, %k2, %k0
-; KNL-NEXT: kshiftrw $9, %k2, %k2
-; KNL-NEXT: kshiftrw $6, %k1, %k3
-; KNL-NEXT: kxorw %k2, %k3, %k2
-; KNL-NEXT: kshiftlw $15, %k2, %k2
-; KNL-NEXT: kshiftrw $9, %k2, %k2
+; KNL-NEXT: kmovw %edi, %k0
+; KNL-NEXT: kmovw %esi, %k1
+; KNL-NEXT: kshiftrw $8, %k1, %k2
+; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kshiftrw $9, %k1, %k1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: kshiftrw $6, %k0, %k1
+; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kxorw %k2, %k1, %k1
-; KNL-NEXT: kshiftlw $9, %k1, %k1
+; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: kshiftrw $9, %k1, %k1
-; KNL-NEXT: kshiftlw $7, %k0, %k0
-; KNL-NEXT: korw %k0, %k1, %k1
+; KNL-NEXT: kxorw %k1, %k0, %k0
+; KNL-NEXT: kshiftlw $9, %k0, %k0
+; KNL-NEXT: kshiftrw $9, %k0, %k0
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: kshiftlw $7, %k1, %k1
+; KNL-NEXT: korw %k1, %k0, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
@@ -1337,19 +1341,23 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
;
; AVX512DQ-LABEL: test18:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k1
-; AVX512DQ-NEXT: kmovw %esi, %k2
-; AVX512DQ-NEXT: kshiftrw $8, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $9, %k2, %k2
-; AVX512DQ-NEXT: kshiftrb $6, %k1, %k3
-; AVX512DQ-NEXT: kxorb %k2, %k3, %k2
-; AVX512DQ-NEXT: kshiftlb $7, %k2, %k2
-; AVX512DQ-NEXT: kshiftrb $1, %k2, %k2
+; AVX512DQ-NEXT: kmovw %edi, %k0
+; AVX512DQ-NEXT: kmovw %esi, %k1
+; AVX512DQ-NEXT: kshiftrw $8, %k1, %k2
+; AVX512DQ-NEXT: kmovw %k2, %eax
+; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1
+; AVX512DQ-NEXT: kmovw %k1, %ecx
+; AVX512DQ-NEXT: kshiftrb $6, %k0, %k1
+; AVX512DQ-NEXT: kmovw %ecx, %k2
; AVX512DQ-NEXT: kxorb %k2, %k1, %k1
-; AVX512DQ-NEXT: kshiftlb $1, %k1, %k1
+; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
-; AVX512DQ-NEXT: korb %k0, %k1, %k0
+; AVX512DQ-NEXT: kxorb %k1, %k0, %k0
+; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
+; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
+; AVX512DQ-NEXT: kmovw %eax, %k1
+; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
+; AVX512DQ-NEXT: korb %k1, %k0, %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Wed Jan 9 23:43:54 2019
@@ -1617,7 +1617,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x32,0xc2]
; X86-NEXT: vpmovqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x32,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1670,7 +1671,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x22,0xc2]
; X86-NEXT: vpmovsqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x22,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1723,7 +1725,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusqb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x12,0xc2]
; X86-NEXT: vpmovusqb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x12,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1776,7 +1779,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x32,0xc2]
; X86-NEXT: vpmovqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x32,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1833,7 +1837,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x22,0xc2]
; X86-NEXT: vpmovsqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x22,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -1890,7 +1895,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusqb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x12,0xc2]
; X86-NEXT: vpmovusqb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x12,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2619,7 +2625,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_db_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x31,0xc2]
; X86-NEXT: vpmovdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x31,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2672,7 +2679,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x21,0xc2]
; X86-NEXT: vpmovsdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x21,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2725,7 +2733,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusdb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x11,0xc2]
; X86-NEXT: vpmovusdb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x11,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2778,7 +2787,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmov_db_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x31,0xc2]
; X86-NEXT: vpmovdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x31,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2835,7 +2845,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovsdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x21,0xc2]
; X86-NEXT: vpmovsdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x21,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
@@ -2892,7 +2903,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
; X86-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
; X86: # %bb.0:
-; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
+; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
; X86-NEXT: vpmovusdb %ymm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x11,0xc2]
; X86-NEXT: vpmovusdb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x11,0xc1]
; X86-NEXT: vpaddb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xfc,0xca]
Modified: llvm/trunk/test/CodeGen/X86/vec_floor.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_floor.ll?rev=350800&r1=350799&r2=350800&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_floor.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_floor.ll Wed Jan 9 23:43:54 2019
@@ -1694,7 +1694,8 @@ define <4 x float> @floor_mask_ss_mask8(
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %k1
; AVX512F-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
; AVX512F-NEXT: vmovaps %xmm2, %xmm0
; AVX512F-NEXT: vzeroupper
@@ -1702,7 +1703,8 @@ define <4 x float> @floor_mask_ss_mask8(
;
; AVX512VL-LABEL: floor_mask_ss_mask8:
; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1
+; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %k1
; AVX512VL-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm2 {%k1}
; AVX512VL-NEXT: vmovaps %xmm2, %xmm0
; AVX512VL-NEXT: retq
@@ -1753,14 +1755,16 @@ define <4 x float> @floor_maskz_ss_mask8
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %k1
; AVX512F-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: floor_maskz_ss_mask8:
; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1
+; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %k1
; AVX512VL-NEXT: vrndscaless $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512VL-NEXT: retq
%mask1 = fcmp oeq <4 x float> %x, %y
@@ -1804,7 +1808,8 @@ define <2 x double> @floor_mask_sd_mask8
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
+; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %k1
; AVX512F-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
; AVX512F-NEXT: vmovapd %xmm2, %xmm0
; AVX512F-NEXT: vzeroupper
@@ -1812,7 +1817,8 @@ define <2 x double> @floor_mask_sd_mask8
;
; AVX512VL-LABEL: floor_mask_sd_mask8:
; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
+; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %k1
; AVX512VL-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm2 {%k1}
; AVX512VL-NEXT: vmovapd %xmm2, %xmm0
; AVX512VL-NEXT: retq
@@ -1863,14 +1869,16 @@ define <2 x double> @floor_maskz_sd_mask
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
+; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %k1
; AVX512F-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: floor_maskz_sd_mask8:
; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
+; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %k1
; AVX512VL-NEXT: vrndscalesd $1, %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512VL-NEXT: retq
%mask1 = fcmp oeq <2 x double> %x, %y
@@ -2758,7 +2766,8 @@ define <4 x float> @ceil_mask_ss_mask8(<
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %k1
; AVX512F-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
; AVX512F-NEXT: vmovaps %xmm2, %xmm0
; AVX512F-NEXT: vzeroupper
@@ -2766,7 +2775,8 @@ define <4 x float> @ceil_mask_ss_mask8(<
;
; AVX512VL-LABEL: ceil_mask_ss_mask8:
; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1
+; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %k1
; AVX512VL-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm2 {%k1}
; AVX512VL-NEXT: vmovaps %xmm2, %xmm0
; AVX512VL-NEXT: retq
@@ -2817,14 +2827,16 @@ define <4 x float> @ceil_maskz_ss_mask8(
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; AVX512F-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %k1
; AVX512F-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: ceil_maskz_ss_mask8:
; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k1
+; AVX512VL-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %k1
; AVX512VL-NEXT: vrndscaless $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512VL-NEXT: retq
%mask1 = fcmp oeq <4 x float> %x, %y
@@ -2868,7 +2880,8 @@ define <2 x double> @ceil_mask_sd_mask8(
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
+; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %k1
; AVX512F-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
; AVX512F-NEXT: vmovapd %xmm2, %xmm0
; AVX512F-NEXT: vzeroupper
@@ -2876,7 +2889,8 @@ define <2 x double> @ceil_mask_sd_mask8(
;
; AVX512VL-LABEL: ceil_mask_sd_mask8:
; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
+; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %k1
; AVX512VL-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm2 {%k1}
; AVX512VL-NEXT: vmovapd %xmm2, %xmm0
; AVX512VL-NEXT: retq
@@ -2927,14 +2941,16 @@ define <2 x double> @ceil_maskz_sd_mask8
; AVX512F: ## %bb.0:
; AVX512F-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
+; AVX512F-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %k1
; AVX512F-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: ceil_maskz_sd_mask8:
; AVX512VL: ## %bb.0:
-; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
+; AVX512VL-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT: kmovw %k0, %k1
; AVX512VL-NEXT: vrndscalesd $2, %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512VL-NEXT: retq
%mask1 = fcmp oeq <2 x double> %x, %y
More information about the llvm-commits
mailing list