[llvm] 06ba969 - [X86] Make (insert_vector_elt (v8i16 zerovec), i16 %x, 0) generate the same code as (v8i16 (build_vector %x, 0, 0, 0, 0, 0, 0, 0)).

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 9 22:08:22 PST 2020


Author: Craig Topper
Date: 2020-02-09T21:52:11-08:00
New Revision: 06ba969c9dc98ba4d40d6e6c13822e2752aaffc5

URL: https://github.com/llvm/llvm-project/commit/06ba969c9dc98ba4d40d6e6c13822e2752aaffc5
DIFF: https://github.com/llvm/llvm-project/commit/06ba969c9dc98ba4d40d6e6c13822e2752aaffc5.diff

LOG: [X86] Make (insert_vector_elt (v8i16 zerovec), i16 %x, 0) generate the same code as (v8i16 (build_vector %x, 0, 0, 0, 0, 0, 0, 0)).

Instead of using a insrw to element 0, use movzx and movd.

Same for v16i8.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
    llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
    llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
    llvm/test/CodeGen/X86/broadcastm-lowering.ll
    llvm/test/CodeGen/X86/buildvec-insertvec.ll
    llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll
    llvm/test/CodeGen/X86/vec_cast.ll
    llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
    llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
    llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ad1055e8ed85..279c91be276c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18006,11 +18006,22 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
   assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
 
   // This will be just movd/movq/movss/movsd.
-  if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode()) &&
-      (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
-       EltVT == MVT::i64)) {
-    N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
-    return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
+  if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode())) {
+    if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
+        EltVT == MVT::i64) {
+      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
+      return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
+    }
+
+    // We can't directly insert an i8 or i16 into a vector, so zero extend
+    // it to i32 first.
+    if (EltVT == MVT::i16 || EltVT == MVT::i8) {
+      N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, N1);
+      MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
+      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, N1);
+      N1 = getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
+      return DAG.getBitcast(VT, N1);
+    }
   }
 
   // Transform it so it match pinsr{b,w} which expects a GR32 as its second

diff  --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index e7f132bcdc67..a4ab1b08b119 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -5229,10 +5229,9 @@ define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpgtd %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xe9]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -5277,10 +5276,9 @@ define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask)
 ; X86-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
 ; X86-NEXT:    vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
 ; X86-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
-; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
 ; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
+; X86-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    vmovd %edx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
 ; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
@@ -5303,10 +5301,9 @@ define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask)
 ; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
 ; X64-NEXT:    vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
 ; X64-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
-; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -5349,10 +5346,9 @@ define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe9,0x06]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -5397,10 +5393,9 @@ define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask
 ; X86-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
 ; X86-NEXT:    vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
 ; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
-; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
 ; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
+; X86-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    vmovd %edx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
 ; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
@@ -5423,10 +5418,9 @@ define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask
 ; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
 ; X64-NEXT:    vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
 ; X64-NEXT:    vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
-; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -5464,23 +5458,23 @@ define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
 ; CHECK-LABEL: test_cmp_q_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
-; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
-; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x02]
-; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x04]
-; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xe1,0x05]
-; CHECK-NEXT:    vpcmpgtq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xe9]
 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc0]
+; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
+; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x04]
+; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x05]
+; CHECK-NEXT:    vpcmpgtq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xe1]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
 ; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
-; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
 ; CHECK-NEXT:    movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
 ; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
@@ -5511,21 +5505,21 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
-; X86-NEXT:    vpcmpgtq %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xd0]
-; X86-NEXT:    vpcmpleq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x02]
-; X86-NEXT:    vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
-; X86-NEXT:    vpcmpnltq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe9,0x05]
-; X86-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; X86-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xc0]
+; X86-NEXT:    vpcmpleq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd1,0x02]
+; X86-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
+; X86-NEXT:    vpcmpnltq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x05]
+; X86-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
+; X86-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movzbl %cl, %ecx ## encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
 ; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
-; X86-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
-; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
-; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
+; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
-; X86-NEXT:    kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
+; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
 ; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
 ; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
 ; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
@@ -5537,21 +5531,21 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
 ; X64:       ## %bb.0:
 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
-; X64-NEXT:    vpcmpgtq %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xd0]
-; X64-NEXT:    vpcmpleq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x02]
-; X64-NEXT:    vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
-; X64-NEXT:    vpcmpnltq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe9,0x05]
-; X64-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X64-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xc0]
+; X64-NEXT:    vpcmpleq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd1,0x02]
+; X64-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
+; X64-NEXT:    vpcmpnltq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x05]
+; X64-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
+; X64-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
 ; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
 ; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
@@ -5583,23 +5577,23 @@ define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
 ; CHECK-LABEL: test_ucmp_q_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
-; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x01]
-; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xd1,0x02]
-; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x04]
-; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe1,0x05]
-; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe9,0x06]
 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x01]
+; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x02]
+; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x04]
+; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xd9,0x05]
+; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe1,0x06]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
 ; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
-; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
 ; CHECK-NEXT:    movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
 ; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
@@ -5630,21 +5624,21 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
-; X86-NEXT:    vpcmpltuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x01]
-; X86-NEXT:    vpcmpleuq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd9,0x02]
-; X86-NEXT:    vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
-; X86-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe9,0x05]
-; X86-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; X86-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x01]
+; X86-NEXT:    vpcmpleuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x02]
+; X86-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
+; X86-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe1,0x05]
+; X86-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
+; X86-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movzbl %cl, %ecx ## encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
 ; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
-; X86-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
-; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
-; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
+; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
-; X86-NEXT:    kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
+; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
 ; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
 ; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
 ; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
@@ -5656,21 +5650,21 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
 ; X64:       ## %bb.0:
 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
-; X64-NEXT:    vpcmpltuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x01]
-; X64-NEXT:    vpcmpleuq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd9,0x02]
-; X64-NEXT:    vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
-; X64-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe9,0x05]
-; X64-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X64-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x01]
+; X64-NEXT:    vpcmpleuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x02]
+; X64-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
+; X64-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe1,0x05]
+; X64-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
+; X64-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
 ; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
 ; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]

diff  --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
index 8907578cd33b..16bd8fd65cb7 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
@@ -4653,10 +4653,9 @@ define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
 ; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpgtw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xe9]
-; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -4701,10 +4700,9 @@ define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask)
 ; X86-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
 ; X86-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
 ; X86-NEXT:    vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
-; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
+; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
@@ -4727,10 +4725,9 @@ define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask)
 ; X64-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
 ; X64-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
 ; X64-NEXT:    vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
-; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -4773,10 +4770,9 @@ define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
 ; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x06]
-; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -4821,10 +4817,9 @@ define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask
 ; X86-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
 ; X86-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
 ; X86-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
-; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
+; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
@@ -4847,10 +4842,9 @@ define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask
 ; X64-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
 ; X64-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
 ; X64-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
-; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -4893,10 +4887,9 @@ define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xe9]
-; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -4940,10 +4933,9 @@ define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
 ; X86-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
 ; X86-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
 ; X86-NEXT:    vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
-; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
+; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
@@ -4965,10 +4957,9 @@ define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
 ; X64-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
 ; X64-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
 ; X64-NEXT:    vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
-; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -5010,10 +5001,9 @@ define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x06]
-; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -5057,10 +5047,9 @@ define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask)
 ; X86-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
 ; X86-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
 ; X86-NEXT:    vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
-; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
+; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    vmovd %edx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
@@ -5082,10 +5071,9 @@ define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask)
 ; X64-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
 ; X64-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
 ; X64-NEXT:    vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
-; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
@@ -5122,23 +5110,23 @@ define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_cmp_w_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
-; CHECK-NEXT:    vpcmpgtw %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc8]
-; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x02]
-; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x04]
-; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe1,0x05]
-; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xe9]
 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0]
+; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x02]
+; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04]
+; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x05]
+; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xe1]
+; CHECK-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
-; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
+; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
+; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
-; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
+; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
 ; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
@@ -5168,21 +5156,21 @@ define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 ; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
-; X86-NEXT:    vpcmpgtw %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xd0]
-; X86-NEXT:    vpcmplew %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02]
-; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
-; X86-NEXT:    vpcmpnltw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x05]
-; X86-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; X86-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0]
+; X86-NEXT:    vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02]
+; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
+; X86-NEXT:    vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05]
+; X86-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
+; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
-; X86-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
-; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
-; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
+; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
-; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
+; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
 ; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
 ; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
@@ -5193,21 +5181,21 @@ define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
-; X64-NEXT:    vpcmpgtw %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xd0]
-; X64-NEXT:    vpcmplew %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02]
-; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
-; X64-NEXT:    vpcmpnltw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x05]
-; X64-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X64-NEXT:    vpcmpgtw %xmm0, %xmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xc0]
+; X64-NEXT:    vpcmplew %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x02]
+; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
+; X64-NEXT:    vpcmpnltw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x05]
+; X64-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
+; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
-; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
+; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
+; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
 ; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
 ; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
 ; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
@@ -5238,23 +5226,23 @@ define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_ucmp_w_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
-; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x01]
-; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd1,0x02]
-; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x04]
-; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x05]
-; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe9,0x06]
 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x01]
+; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x02]
+; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x04]
+; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x05]
+; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x06]
+; CHECK-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
-; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
+; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
+; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
-; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
+; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
 ; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
@@ -5284,21 +5272,21 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
 ; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
-; X86-NEXT:    vpcmpltuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01]
-; X86-NEXT:    vpcmpleuw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02]
-; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
-; X86-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x05]
-; X86-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; X86-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01]
+; X86-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02]
+; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
+; X86-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05]
+; X86-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
+; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
-; X86-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
-; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
-; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
+; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
-; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
+; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
 ; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
 ; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
@@ -5309,21 +5297,21 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
-; X64-NEXT:    vpcmpltuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01]
-; X64-NEXT:    vpcmpleuw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02]
-; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
-; X64-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x05]
-; X64-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X64-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01]
+; X64-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x02]
+; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x04]
+; X64-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x05]
+; X64-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
+; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
-; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
+; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
+; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
 ; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
 ; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
 ; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]

diff  --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
index 28c4111747c9..d339e21c37a2 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
@@ -8055,23 +8055,23 @@ define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: test_cmp_d_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
-; CHECK-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc8]
-; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x02]
-; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04]
-; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x05]
-; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xe9]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc0]
+; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x02]
+; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x04]
+; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x05]
+; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k4 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xe1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
-; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
 ; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
@@ -8102,21 +8102,21 @@ define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
-; X86-NEXT:    vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0]
-; X86-NEXT:    vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
-; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
-; X86-NEXT:    vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05]
-; X86-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; X86-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xc0]
+; X86-NEXT:    vpcmpled %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x02]
+; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04]
+; X86-NEXT:    vpcmpnltd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x05]
+; X86-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
+; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
 ; X86-NEXT:    kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
-; X86-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
-; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
-; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
+; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
-; X86-NEXT:    kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd]
+; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
 ; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
 ; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
@@ -8128,21 +8128,21 @@ define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
-; X64-NEXT:    vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0]
-; X64-NEXT:    vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
-; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
-; X64-NEXT:    vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05]
-; X64-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X64-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xc0]
+; X64-NEXT:    vpcmpled %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x02]
+; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04]
+; X64-NEXT:    vpcmpnltd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x05]
+; X64-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
+; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
+; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
 ; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
@@ -8174,23 +8174,23 @@ define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: test_ucmp_d_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
-; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x01]
-; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x02]
-; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04]
-; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x05]
-; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x06]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x01]
+; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x02]
+; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x04]
+; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd9,0x05]
+; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x06]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
-; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
 ; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
@@ -8221,21 +8221,21 @@ define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
 ; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
-; X86-NEXT:    vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
-; X86-NEXT:    vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
-; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
-; X86-NEXT:    vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05]
-; X86-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00]
+; X86-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x01]
+; X86-NEXT:    vpcmpleud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x02]
+; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04]
+; X86-NEXT:    vpcmpnltud %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x05]
+; X86-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
+; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
 ; X86-NEXT:    kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
-; X86-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
-; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
-; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
+; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
 ; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
-; X86-NEXT:    kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd]
+; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
 ; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
 ; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
@@ -8247,21 +8247,21 @@ define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
 ; X64:       # %bb.0:
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
-; X64-NEXT:    vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
-; X64-NEXT:    vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
-; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
-; X64-NEXT:    vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05]
-; X64-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; X64-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x01]
+; X64-NEXT:    vpcmpleud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x02]
+; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04]
+; X64-NEXT:    vpcmpnltud %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x05]
+; X64-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
+; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
-; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
-; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
+; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
-; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
+; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
 ; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
 ; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
@@ -8298,10 +8298,9 @@ define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xe9]
-; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8347,10 +8346,9 @@ define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
 ; X86-NEXT:    vpcmpgtq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xf1]
 ; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
 ; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8376,10 +8374,9 @@ define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
 ; X64-NEXT:    vpcmpgtq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xf1]
 ; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
 ; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8423,10 +8420,9 @@ define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x06]
-; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8472,10 +8468,9 @@ define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
 ; X86-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x06]
 ; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
 ; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8501,10 +8496,9 @@ define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
 ; X64-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x06]
 ; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
 ; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8548,10 +8542,9 @@ define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xe9]
-; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8596,10 +8589,9 @@ define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
 ; X86-NEXT:    vpcmpgtd %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xf1]
 ; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
 ; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8624,10 +8616,9 @@ define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
 ; X64-NEXT:    vpcmpgtd %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xf1]
 ; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
 ; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8670,10 +8661,9 @@ define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x06]
-; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8718,10 +8708,9 @@ define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
 ; X86-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x06]
 ; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
 ; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8746,10 +8735,9 @@ define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
 ; X64-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x06]
 ; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
 ; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8792,10 +8780,9 @@ define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xe9]
-; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8840,10 +8827,9 @@ define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
 ; X86-NEXT:    vpcmpgtq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf1]
 ; X86-NEXT:    kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
 ; X86-NEXT:    kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8868,10 +8854,9 @@ define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
 ; X64-NEXT:    vpcmpgtq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf1]
 ; X64-NEXT:    kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
 ; X64-NEXT:    kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8914,10 +8899,9 @@ define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
 ; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x05]
 ; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x06]
-; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8962,10 +8946,9 @@ define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
 ; X86-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x06]
 ; X86-NEXT:    kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
 ; X86-NEXT:    kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X86-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
@@ -8990,10 +8973,9 @@ define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
 ; X64-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x06]
 ; X64-NEXT:    kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
 ; X64-NEXT:    kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
-; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
-; X64-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
+; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
+; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
 ; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]

diff  --git a/llvm/test/CodeGen/X86/broadcastm-lowering.ll b/llvm/test/CodeGen/X86/broadcastm-lowering.ll
index 986d313cb8d0..f995c77f76ea 100644
--- a/llvm/test/CodeGen/X86/broadcastm-lowering.ll
+++ b/llvm/test/CodeGen/X86/broadcastm-lowering.ll
@@ -10,8 +10,8 @@ define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
 ; AVX512CD-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; AVX512CD-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512CD-NEXT:    kmovw %k0, %eax
-; AVX512CD-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX512CD-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
+; AVX512CD-NEXT:    movzbl %al, %ecx
+; AVX512CD-NEXT:    vmovd %ecx, %xmm0
 ; AVX512CD-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
 ; AVX512CD-NEXT:    vzeroupper
 ; AVX512CD-NEXT:    retq
@@ -157,11 +157,11 @@ define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
 ; AVX512CD-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; AVX512CD-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; AVX512CD-NEXT:    kmovw %k0, %eax
-; AVX512CD-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX512CD-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
-; AVX512CD-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
+; AVX512CD-NEXT:    kmovw %k0, %ecx
+; AVX512CD-NEXT:    vmovd %eax, %xmm0
+; AVX512CD-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0
+; AVX512CD-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0
+; AVX512CD-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
 ; AVX512CD-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
 ; AVX512CD-NEXT:    retq
 ;

diff  --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
index 7867e4494d1a..95d5e940265b 100644
--- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll
@@ -304,8 +304,8 @@ define <8 x i16> @test_buildvector_v8i16_partial(i16 %a1, i16 %a3, i16 %a4, i16
 define <8 x i16> @test_buildvector_v8i16_register_zero(i16 %a0, i16 %a3, i16 %a4, i16 %a5) {
 ; CHECK-LABEL: test_buildvector_v8i16_register_zero:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pxor %xmm0, %xmm0
-; CHECK-NEXT:    pinsrw $0, %edi, %xmm0
+; CHECK-NEXT:    movzwl %di, %eax
+; CHECK-NEXT:    movd %eax, %xmm0
 ; CHECK-NEXT:    pinsrw $3, %esi, %xmm0
 ; CHECK-NEXT:    pinsrw $4, %edx, %xmm0
 ; CHECK-NEXT:    pinsrw $5, %ecx, %xmm0
@@ -480,8 +480,8 @@ define <16 x i8> @test_buildvector_v16i8_register_zero(i8 %a0, i8 %a4, i8 %a6, i
 ;
 ; SSE41-LABEL: test_buildvector_v16i8_register_zero:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    pinsrb $0, %edi, %xmm0
+; SSE41-NEXT:    movzbl %dil, %eax
+; SSE41-NEXT:    movd %eax, %xmm0
 ; SSE41-NEXT:    pinsrb $4, %esi, %xmm0
 ; SSE41-NEXT:    pinsrb $6, %edx, %xmm0
 ; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0

diff  --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll
index 252d2e08b14a..e957b654980d 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-256.ll
@@ -604,33 +604,36 @@ define <4 x double> @merge_4f64_f64_34uz_volatile(double* %ptr) nounwind uwtable
 define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile(i16* %ptr) nounwind uwtable noinline ssp {
 ; AVX1-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
 ; AVX1:       # %bb.0:
+; AVX1-NEXT:    movzwl (%rdi), %eax
 ; AVX1-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX1-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm1
 ; AVX1-NEXT:    vpinsrw $4, 24(%rdi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpinsrw $6, 28(%rdi), %xmm0, %xmm0
 ; AVX1-NEXT:    vpinsrw $7, 30(%rdi), %xmm0, %xmm0
+; AVX1-NEXT:    vmovd %eax, %xmm1
 ; AVX1-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
 ; AVX2:       # %bb.0:
+; AVX2-NEXT:    movzwl (%rdi), %eax
 ; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX2-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm1
 ; AVX2-NEXT:    vpinsrw $4, 24(%rdi), %xmm0, %xmm0
 ; AVX2-NEXT:    vpinsrw $6, 28(%rdi), %xmm0, %xmm0
 ; AVX2-NEXT:    vpinsrw $7, 30(%rdi), %xmm0, %xmm0
+; AVX2-NEXT:    vmovd %eax, %xmm1
 ; AVX2-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
 ; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    movzwl (%rdi), %eax
 ; AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX512F-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm1
 ; AVX512F-NEXT:    vpinsrw $4, 24(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:    vpinsrw $6, 28(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:    vpinsrw $7, 30(%rdi), %xmm0, %xmm0
+; AVX512F-NEXT:    vmovd %eax, %xmm1
 ; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
 ; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 ; AVX512F-NEXT:    retq
@@ -638,11 +641,12 @@ define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile(i16* %ptr) nounwind
 ; X32-AVX-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
 ; X32-AVX:       # %bb.0:
 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX-NEXT:    movzwl (%eax), %ecx
 ; X32-AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; X32-AVX-NEXT:    vpinsrw $0, (%eax), %xmm0, %xmm1
 ; X32-AVX-NEXT:    vpinsrw $4, 24(%eax), %xmm0, %xmm0
 ; X32-AVX-NEXT:    vpinsrw $6, 28(%eax), %xmm0, %xmm0
 ; X32-AVX-NEXT:    vpinsrw $7, 30(%eax), %xmm0, %xmm0
+; X32-AVX-NEXT:    vmovd %ecx, %xmm1
 ; X32-AVX-NEXT:    vpinsrw $3, 6(%eax), %xmm1, %xmm1
 ; X32-AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X32-AVX-NEXT:    retl

diff  --git a/llvm/test/CodeGen/X86/vec_cast.ll b/llvm/test/CodeGen/X86/vec_cast.ll
index 6e9a16768951..0a6bc2f59b68 100644
--- a/llvm/test/CodeGen/X86/vec_cast.ll
+++ b/llvm/test/CodeGen/X86/vec_cast.ll
@@ -87,8 +87,8 @@ define <8 x i32> @d(<8 x i16> %a) nounwind {
 define <3 x i32> @e(<3 x i16> %a) nounwind {
 ; CHECK-LIN-LABEL: e:
 ; CHECK-LIN:       # %bb.0:
-; CHECK-LIN-NEXT:    pxor %xmm0, %xmm0
-; CHECK-LIN-NEXT:    pinsrw $0, %edi, %xmm0
+; CHECK-LIN-NEXT:    movzwl %di, %eax
+; CHECK-LIN-NEXT:    movd %eax, %xmm0
 ; CHECK-LIN-NEXT:    pinsrw $2, %esi, %xmm0
 ; CHECK-LIN-NEXT:    pinsrw $4, %edx, %xmm0
 ; CHECK-LIN-NEXT:    retq
@@ -97,9 +97,8 @@ define <3 x i32> @e(<3 x i16> %a) nounwind {
 ; CHECK-WIN:       # %bb.0:
 ; CHECK-WIN-NEXT:    # kill: def $r8w killed $r8w def $r8d
 ; CHECK-WIN-NEXT:    # kill: def $dx killed $dx def $edx
-; CHECK-WIN-NEXT:    # kill: def $cx killed $cx def $ecx
-; CHECK-WIN-NEXT:    pxor %xmm0, %xmm0
-; CHECK-WIN-NEXT:    pinsrw $0, %ecx, %xmm0
+; CHECK-WIN-NEXT:    movzwl %cx, %eax
+; CHECK-WIN-NEXT:    movd %eax, %xmm0
 ; CHECK-WIN-NEXT:    pinsrw $2, %edx, %xmm0
 ; CHECK-WIN-NEXT:    pinsrw $4, %r8d, %xmm0
 ; CHECK-WIN-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
index b36979b6ede2..fc953b409792 100644
--- a/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
+++ b/llvm/test/CodeGen/X86/vector-mulfix-legalize.ll
@@ -77,8 +77,8 @@ define <4 x i16> @smulfixsat(<4 x i16> %a) {
 ; CHECK-NEXT:    cmovgl %r8d, %edi
 ; CHECK-NEXT:    cmpl $-16384, %eax # imm = 0xC000
 ; CHECK-NEXT:    cmovll %ecx, %edi
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pinsrw $0, %edi, %xmm1
+; CHECK-NEXT:    movzwl %di, %eax
+; CHECK-NEXT:    movd %eax, %xmm1
 ; CHECK-NEXT:    pinsrw $1, %esi, %xmm1
 ; CHECK-NEXT:    pinsrw $2, %edx, %xmm1
 ; CHECK-NEXT:    pextrw $3, %xmm0, %eax
@@ -126,8 +126,8 @@ define <4 x i16> @umulfixsat(<4 x i16> %a) {
 ; CHECK-NEXT:    movl $32767, %edx # imm = 0x7FFF
 ; CHECK-NEXT:    negl %edx
 ; CHECK-NEXT:    cmoval %eax, %esi
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pinsrw $0, %esi, %xmm1
+; CHECK-NEXT:    movzwl %si, %edx
+; CHECK-NEXT:    movd %edx, %xmm1
 ; CHECK-NEXT:    pinsrw $1, %edi, %xmm1
 ; CHECK-NEXT:    pinsrw $2, %ecx, %xmm1
 ; CHECK-NEXT:    pextrw $3, %xmm0, %ecx

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 3453f24ae961..8b3843d160c9 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -2446,16 +2446,16 @@ define <16 x i8> @PR31364(i8* nocapture readonly %a, i8* nocapture readonly %b)
 ;
 ; SSE41-LABEL: PR31364:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    pinsrb $0, (%rdi), %xmm0
+; SSE41-NEXT:    movzbl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
 ; SSE41-NEXT:    pinsrb $1, (%rsi), %xmm0
 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: PR31364:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vpinsrb $0, (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    movzbl (%rdi), %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm0
 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1],zero,xmm0[1,1,1,1,1,0,0,0]
 ; AVX-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll
index e4f773aeb365..957a3c544fd4 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll
@@ -1342,15 +1342,15 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
 ; SSE41-NEXT:    # kill: def $esi killed $esi def $rsi
 ; SSE41-NEXT:    # kill: def $edi killed $edi def $rdi
 ; SSE41-NEXT:    andl $7, %edi
+; SSE41-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE41-NEXT:    movzwl -40(%rsp,%rdi,2), %eax
 ; SSE41-NEXT:    andl $7, %esi
 ; SSE41-NEXT:    andl $7, %edx
 ; SSE41-NEXT:    andl $7, %ecx
-; SSE41-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; SSE41-NEXT:    andl $7, %r8d
 ; SSE41-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
 ; SSE41-NEXT:    andl $7, %r9d
-; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    pinsrw $0, -40(%rsp,%rdi,2), %xmm0
+; SSE41-NEXT:    movd %eax, %xmm0
 ; SSE41-NEXT:    pinsrw $1, -24(%rsp,%rsi,2), %xmm0
 ; SSE41-NEXT:    pinsrw $2, -40(%rsp,%rdx,2), %xmm0
 ; SSE41-NEXT:    pinsrw $3, -24(%rsp,%rcx,2), %xmm0
@@ -1367,15 +1367,15 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
 ; AVX-NEXT:    # kill: def $esi killed $esi def $rsi
 ; AVX-NEXT:    # kill: def $edi killed $edi def $rdi
 ; AVX-NEXT:    andl $7, %edi
+; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX-NEXT:    movzwl -40(%rsp,%rdi,2), %eax
 ; AVX-NEXT:    andl $7, %esi
 ; AVX-NEXT:    andl $7, %edx
 ; AVX-NEXT:    andl $7, %ecx
-; AVX-NEXT:    vmovdqa %xmm0, -{{[0-9]+}}(%rsp)
 ; AVX-NEXT:    andl $7, %r8d
 ; AVX-NEXT:    vmovaps %xmm1, -{{[0-9]+}}(%rsp)
 ; AVX-NEXT:    andl $7, %r9d
-; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vpinsrw $0, -40(%rsp,%rdi,2), %xmm0, %xmm0
+; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
 ; AVX-NEXT:    vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
 ; AVX-NEXT:    vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0


        


More information about the llvm-commits mailing list