[llvm] r273253 - [AVX512] Add patterns for any-extending a mask that use the def of KMOVW/KMOVB without going through an EXTRACT_SUBREG and a MOVZX.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 21 00:37:33 PDT 2016


Author: ctopper
Date: Tue Jun 21 02:37:32 2016
New Revision: 273253

URL: http://llvm.org/viewvc/llvm-project?rev=273253&view=rev
Log:
[AVX512] Add patterns for any-extending a mask that use the def of KMOVW/KMOVB without going through an EXTRACT_SUBREG and a MOVZX.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=273253&r1=273252&r2=273253&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Jun 21 02:37:32 2016
@@ -1997,6 +1997,8 @@ let Predicates = [HasDQI] in {
             (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
   def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
             (KMOVBrk VK8:$src)>;
+  def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
+            (KMOVBrk VK8:$src)>;
 }
 let Predicates = [HasAVX512] in {
   def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
@@ -2005,6 +2007,8 @@ let Predicates = [HasAVX512] in {
             (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
   def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
             (KMOVWrk VK16:$src)>;
+  def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
+            (KMOVWrk VK16:$src)>;
 }
 let Predicates = [HasBWI] in {
   def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
@@ -2154,6 +2158,8 @@ let Predicates = [HasAVX512, NoDQI] in {
               sub_8bit)>;
   def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
             (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16))>;
+  def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
+            (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16))>;
 }
 
 let Predicates = [HasAVX512] in {

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=273253&r1=273252&r2=273253&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Tue Jun 21 02:37:32 2016
@@ -974,29 +974,29 @@ define <8 x i16> @test_cmp_d_512(<16 x i
 ; CHECK-LABEL: test_cmp_d_512:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r8d
-; CHECK-NEXT:    vpcmpltd %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r9d
-; CHECK-NEXT:    vpcmpled %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r10d
-; CHECK-NEXT:    vpcmpunordd %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %esi
-; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %edi
-; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %eax
-; CHECK-NEXT:    vpcmpnled %zmm1, %zmm0, %k0
+; CHECK-NEXT:    vpcmpltd %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpcmpled %zmm1, %zmm0, %k2
+; CHECK-NEXT:    vpcmpunordd %zmm1, %zmm0, %k3
+; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4
+; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k5
+; CHECK-NEXT:    vpcmpnled %zmm1, %zmm0, %k6
+; CHECK-NEXT:    vpcmpordd %zmm1, %zmm0, %k7
+; CHECK-NEXT:    kmovw %k1, %eax
 ; CHECK-NEXT:    kmovw %k0, %ecx
-; CHECK-NEXT:    vpcmpordd %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %edx
-; CHECK-NEXT:    vmovd %r8d, %xmm0
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT:    vmovd %ecx, %xmm0
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k2, %eax
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k3, %eax
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k4, %eax
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k5, %eax
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k6, %eax
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k7, %eax
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -1022,29 +1022,29 @@ define <8 x i16> @test_mask_cmp_d_512(<1
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r8d
-; CHECK-NEXT:    vpcmpltd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r9d
-; CHECK-NEXT:    vpcmpled %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r10d
-; CHECK-NEXT:    vpcmpunordd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %esi
-; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %edi
-; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %eax
-; CHECK-NEXT:    vpcmpnled %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    vpcmpltd %zmm1, %zmm0, %k2 {%k1}
+; CHECK-NEXT:    vpcmpled %zmm1, %zmm0, %k3 {%k1}
+; CHECK-NEXT:    vpcmpunordd %zmm1, %zmm0, %k4 {%k1}
+; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k5 {%k1}
+; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k6 {%k1}
+; CHECK-NEXT:    vpcmpnled %zmm1, %zmm0, %k7 {%k1}
+; CHECK-NEXT:    vpcmpordd %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT:    kmovw %k2, %eax
 ; CHECK-NEXT:    kmovw %k0, %ecx
-; CHECK-NEXT:    vpcmpordd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %edx
-; CHECK-NEXT:    vmovd %r8d, %xmm0
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT:    vmovd %ecx, %xmm0
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k3, %eax
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k4, %eax
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k5, %eax
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k6, %eax
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k7, %eax
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k1, %eax
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -1071,29 +1071,29 @@ define <8 x i16> @test_ucmp_d_512(<16 x
 ; CHECK-LABEL: test_ucmp_d_512:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpequd %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r8d
-; CHECK-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r9d
-; CHECK-NEXT:    vpcmpleud %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r10d
-; CHECK-NEXT:    vpcmpunordud %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %esi
-; CHECK-NEXT:    vpcmpnequd %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %edi
-; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %eax
-; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
+; CHECK-NEXT:    vpcmpltud %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpcmpleud %zmm1, %zmm0, %k2
+; CHECK-NEXT:    vpcmpunordud %zmm1, %zmm0, %k3
+; CHECK-NEXT:    vpcmpnequd %zmm1, %zmm0, %k4
+; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k5
+; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k6
+; CHECK-NEXT:    vpcmpordud %zmm1, %zmm0, %k7
+; CHECK-NEXT:    kmovw %k1, %eax
 ; CHECK-NEXT:    kmovw %k0, %ecx
-; CHECK-NEXT:    vpcmpordud %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %edx
-; CHECK-NEXT:    vmovd %r8d, %xmm0
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT:    vmovd %ecx, %xmm0
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k2, %eax
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k3, %eax
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k4, %eax
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k5, %eax
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k6, %eax
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k7, %eax
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -1119,29 +1119,29 @@ define <8 x i16> @test_mask_ucmp_d_512(<
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vpcmpequd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r8d
-; CHECK-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r9d
-; CHECK-NEXT:    vpcmpleud %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r10d
-; CHECK-NEXT:    vpcmpunordud %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %esi
-; CHECK-NEXT:    vpcmpnequd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %edi
-; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %eax
-; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    vpcmpltud %zmm1, %zmm0, %k2 {%k1}
+; CHECK-NEXT:    vpcmpleud %zmm1, %zmm0, %k3 {%k1}
+; CHECK-NEXT:    vpcmpunordud %zmm1, %zmm0, %k4 {%k1}
+; CHECK-NEXT:    vpcmpnequd %zmm1, %zmm0, %k5 {%k1}
+; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k6 {%k1}
+; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k7 {%k1}
+; CHECK-NEXT:    vpcmpordud %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT:    kmovw %k2, %eax
 ; CHECK-NEXT:    kmovw %k0, %ecx
-; CHECK-NEXT:    vpcmpordud %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %edx
-; CHECK-NEXT:    vmovd %r8d, %xmm0
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT:    vmovd %ecx, %xmm0
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k3, %eax
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k4, %eax
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k5, %eax
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k6, %eax
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k7, %eax
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k1, %eax
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -1168,36 +1168,28 @@ define <8 x i8> @test_cmp_q_512(<8 x i64
 ; CHECK-LABEL: test_cmp_q_512:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r8d
-; CHECK-NEXT:    vpcmpltq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r9d
-; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r10d
-; CHECK-NEXT:    vpcmpunordq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r11d
-; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %edi
-; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    vpcmpltq %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k2
+; CHECK-NEXT:    vpcmpunordq %zmm1, %zmm0, %k3
+; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k4
+; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k5
+; CHECK-NEXT:    vpcmpnleq %zmm1, %zmm0, %k6
+; CHECK-NEXT:    vpcmpordq %zmm1, %zmm0, %k7
 ; CHECK-NEXT:    kmovw %k0, %eax
-; CHECK-NEXT:    vpcmpnleq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %ecx
-; CHECK-NEXT:    vpcmpordq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %edx
-; CHECK-NEXT:    movzbl %r8b, %esi
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r9b, %esi
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r10b, %esi
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r11b, %esi
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %dil, %esi
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k1, %eax
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k2, %eax
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k3, %eax
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k4, %eax
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k5, %eax
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    kmovw %k6, %eax
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    kmovw %k7, %eax
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
@@ -1224,36 +1216,28 @@ define <8 x i8> @test_mask_cmp_q_512(<8
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r8d
-; CHECK-NEXT:    vpcmpltq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r9d
-; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r10d
-; CHECK-NEXT:    vpcmpunordq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r11d
-; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %edi
-; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    vpcmpltq %zmm1, %zmm0, %k2 {%k1}
+; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k3 {%k1}
+; CHECK-NEXT:    vpcmpunordq %zmm1, %zmm0, %k4 {%k1}
+; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k5 {%k1}
+; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k6 {%k1}
+; CHECK-NEXT:    vpcmpnleq %zmm1, %zmm0, %k7 {%k1}
+; CHECK-NEXT:    vpcmpordq %zmm1, %zmm0, %k1 {%k1}
 ; CHECK-NEXT:    kmovw %k0, %eax
-; CHECK-NEXT:    vpcmpnleq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %ecx
-; CHECK-NEXT:    vpcmpordq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %edx
-; CHECK-NEXT:    movzbl %r8b, %esi
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r9b, %esi
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r10b, %esi
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r11b, %esi
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %dil, %esi
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k2, %eax
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k3, %eax
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k4, %eax
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k5, %eax
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k6, %eax
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    kmovw %k7, %eax
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    kmovw %k1, %eax
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
@@ -1281,36 +1265,28 @@ define <8 x i8> @test_ucmp_q_512(<8 x i6
 ; CHECK-LABEL: test_ucmp_q_512:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpequq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r8d
-; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r9d
-; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r10d
-; CHECK-NEXT:    vpcmpunorduq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %r11d
-; CHECK-NEXT:    vpcmpnequq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %edi
-; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k2
+; CHECK-NEXT:    vpcmpunorduq %zmm1, %zmm0, %k3
+; CHECK-NEXT:    vpcmpnequq %zmm1, %zmm0, %k4
+; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k5
+; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k6
+; CHECK-NEXT:    vpcmporduq %zmm1, %zmm0, %k7
 ; CHECK-NEXT:    kmovw %k0, %eax
-; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %ecx
-; CHECK-NEXT:    vpcmporduq %zmm1, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %edx
-; CHECK-NEXT:    movzbl %r8b, %esi
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r9b, %esi
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r10b, %esi
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r11b, %esi
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %dil, %esi
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k1, %eax
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k2, %eax
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k3, %eax
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k4, %eax
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k5, %eax
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    kmovw %k6, %eax
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    kmovw %k7, %eax
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
@@ -1337,36 +1313,28 @@ define <8 x i8> @test_mask_ucmp_q_512(<8
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vpcmpequq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r8d
-; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r9d
-; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r10d
-; CHECK-NEXT:    vpcmpunorduq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %r11d
-; CHECK-NEXT:    vpcmpnequq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %edi
-; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k2 {%k1}
+; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k3 {%k1}
+; CHECK-NEXT:    vpcmpunorduq %zmm1, %zmm0, %k4 {%k1}
+; CHECK-NEXT:    vpcmpnequq %zmm1, %zmm0, %k5 {%k1}
+; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k6 {%k1}
+; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k7 {%k1}
+; CHECK-NEXT:    vpcmporduq %zmm1, %zmm0, %k1 {%k1}
 ; CHECK-NEXT:    kmovw %k0, %eax
-; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %ecx
-; CHECK-NEXT:    vpcmporduq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT:    kmovw %k0, %edx
-; CHECK-NEXT:    movzbl %r8b, %esi
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r9b, %esi
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r10b, %esi
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %r11b, %esi
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %dil, %esi
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k2, %eax
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k3, %eax
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k4, %eax
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k5, %eax
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    kmovw %k6, %eax
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    kmovw %k7, %eax
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
-; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    kmovw %k1, %eax
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)

Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=273253&r1=273252&r2=273253&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Tue Jun 21 02:37:32 2016
@@ -205,29 +205,29 @@ define <8 x i16> @test_cmp_w_256(<16 x i
 ; CHECK-LABEL: test_cmp_w_256:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmplew %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0]
-; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnlew %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    vmovd %r8d, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc0]
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc1,0x01]
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc2,0x02]
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc6,0x03]
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc7,0x04]
+; CHECK-NEXT:    vpcmpltw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc9,0x01]
+; CHECK-NEXT:    vpcmplew %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x03]
+; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnlew %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc1,0x06]
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc2,0x07]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -253,29 +253,29 @@ define <8 x i16> @test_mask_cmp_w_256(<1
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmplew %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0]
-; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnlew %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    vmovd %r8d, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc0]
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc1,0x01]
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc2,0x02]
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc6,0x03]
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc7,0x04]
+; CHECK-NEXT:    vpcmpltw %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd1,0x01]
+; CHECK-NEXT:    vpcmplew %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x03]
+; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnlew %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc1,0x06]
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc2,0x07]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -302,29 +302,29 @@ define <8 x i16> @test_ucmp_w_256(<16 x
 ; CHECK-LABEL: test_ucmp_w_256:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpequw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleuw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunorduw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0]
-; CHECK-NEXT:    vpcmpnequw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmporduw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    vmovd %r8d, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc0]
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc1,0x01]
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc2,0x02]
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc6,0x03]
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc7,0x04]
+; CHECK-NEXT:    vpcmpltuw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleuw %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunorduw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd9,0x03]
+; CHECK-NEXT:    vpcmpnequw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf1,0x06]
+; CHECK-NEXT:    vpcmporduw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc1,0x06]
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc2,0x07]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -350,29 +350,29 @@ define <8 x i16> @test_mask_ucmp_w_256(<
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpequw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleuw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunorduw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0]
-; CHECK-NEXT:    vpcmpnequw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmporduw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    vmovd %r8d, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc0]
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc1,0x01]
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc2,0x02]
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc6,0x03]
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc7,0x04]
+; CHECK-NEXT:    vpcmpltuw %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleuw %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunorduw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe1,0x03]
+; CHECK-NEXT:    vpcmpnequw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xf9,0x06]
+; CHECK-NEXT:    vpcmporduw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc1,0x06]
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc2,0x07]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -493,29 +493,29 @@ define <8 x i16> @test_cmp_b_128(<16 x i
 ; CHECK-LABEL: test_cmp_b_128:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0]
-; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    vmovd %r8d, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc0]
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc1,0x01]
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc2,0x02]
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc6,0x03]
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc7,0x04]
+; CHECK-NEXT:    vpcmpltb %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordb %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x03]
+; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleb %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordb %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc1,0x06]
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc2,0x07]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -541,29 +541,29 @@ define <8 x i16> @test_mask_cmp_b_128(<1
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0]
-; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    vmovd %r8d, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc0]
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc1,0x01]
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc2,0x02]
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc6,0x03]
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc7,0x04]
+; CHECK-NEXT:    vpcmpltb %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordb %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x03]
+; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleb %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc1,0x06]
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc2,0x07]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -590,29 +590,29 @@ define <8 x i16> @test_ucmp_b_128(<16 x
 ; CHECK-LABEL: test_ucmp_b_128:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpequb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltub %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleub %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordub %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0]
-; CHECK-NEXT:    vpcmpnequb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordub %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    vmovd %r8d, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc0]
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc1,0x01]
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc2,0x02]
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc6,0x03]
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc7,0x04]
+; CHECK-NEXT:    vpcmpltub %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleub %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordub %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd9,0x03]
+; CHECK-NEXT:    vpcmpnequb %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordub %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc1,0x06]
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc2,0x07]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -638,29 +638,29 @@ define <8 x i16> @test_mask_ucmp_b_128(<
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpequb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltub %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleub %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordub %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %esi ## encoding: [0xc5,0xf8,0x93,0xf0]
-; CHECK-NEXT:    vpcmpnequb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordub %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    vmovd %r8d, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc0]
-; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc1,0x01]
-; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0 ## encoding: [0x62,0xd1,0x7d,0x08,0xc4,0xc2,0x02]
-; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc6,0x03]
-; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc7,0x04]
+; CHECK-NEXT:    vpcmpltub %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleub %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordub %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe1,0x03]
+; CHECK-NEXT:    vpcmpnequb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordub %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
+; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
+; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
-; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc1,0x06]
-; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc2,0x07]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
+; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -687,36 +687,28 @@ define <8 x i8> @test_cmp_w_128(<8 x i16
 ; CHECK-LABEL: test_cmp_w_128:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnlew %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x01]
+; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x03]
+; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnlew %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
@@ -743,36 +735,28 @@ define <8 x i8> @test_mask_cmp_w_128(<8
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnlew %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltw %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x01]
+; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x03]
+; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnlew %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
@@ -800,36 +784,28 @@ define <8 x i8> @test_ucmp_w_128(<8 x i1
 ; CHECK-LABEL: test_ucmp_w_128:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpequw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunorduw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmporduw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunorduw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x03]
+; CHECK-NEXT:    vpcmpnequw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf1,0x06]
+; CHECK-NEXT:    vpcmporduw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
@@ -856,36 +832,28 @@ define <8 x i8> @test_mask_ucmp_w_128(<8
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpequw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunorduw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmporduw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunorduw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x03]
+; CHECK-NEXT:    vpcmpnequw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xf9,0x06]
+; CHECK-NEXT:    vpcmporduw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)

Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll?rev=273253&r1=273252&r2=273253&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll Tue Jun 21 02:37:32 2016
@@ -469,9 +469,9 @@ define i16 at test_int_x86_avx512_mask_fpcl
 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1
-; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0
-; CHECK-NEXT:    kmovw %k0, %ecx
 ; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %ecx
+; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0
 ; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    addl %ecx, %eax
 ; CHECK-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=273253&r1=273252&r2=273253&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Tue Jun 21 02:37:32 2016
@@ -8,36 +8,28 @@ define <8 x i8> @test_cmp_d_256(<8 x i32
 ; CHECK-LABEL: test_cmp_d_256:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnled %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x01]
+; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordd %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x03]
+; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnled %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordd %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
@@ -64,36 +56,28 @@ define <8 x i8> @test_mask_cmp_d_256(<8
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnled %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltd %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x01]
+; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordd %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x03]
+; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnled %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
@@ -121,36 +105,28 @@ define <8 x i8> @test_ucmp_d_256(<8 x i3
 ; CHECK-LABEL: test_ucmp_d_256:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpequd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordud %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd9,0x03]
+; CHECK-NEXT:    vpcmpnequd %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordud %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
@@ -177,36 +153,28 @@ define <8 x i8> @test_mask_ucmp_d_256(<8
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpequd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordud %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordud %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordud %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x03]
+; CHECK-NEXT:    vpcmpnequd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordud %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
@@ -234,36 +202,28 @@ define <8 x i8> @test_cmp_q_256(<4 x i64
 ; CHECK-LABEL: test_cmp_q_256:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x03]
+; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
@@ -290,36 +250,28 @@ define <8 x i8> @test_mask_cmp_q_256(<4
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordq %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x03]
+; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
@@ -347,36 +299,28 @@ define <8 x i8> @test_ucmp_q_256(<4 x i6
 ; CHECK-LABEL: test_ucmp_q_256:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpequq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleuq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunorduq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmporduq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleuq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunorduq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd9,0x03]
+; CHECK-NEXT:    vpcmpnequq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf1,0x06]
+; CHECK-NEXT:    vpcmporduq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
@@ -403,36 +347,28 @@ define <8 x i8> @test_mask_ucmp_q_256(<4
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpequq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleuq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunorduq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmporduq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltuq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunorduq %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe1,0x03]
+; CHECK-NEXT:    vpcmpnequq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf9,0x06]
+; CHECK-NEXT:    vpcmporduq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
@@ -462,36 +398,28 @@ define <8 x i8> @test_cmp_d_128(<4 x i32
 ; CHECK-LABEL: test_cmp_d_128:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpled %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnled %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x01]
+; CHECK-NEXT:    vpcmpled %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x03]
+; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnled %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordd %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
@@ -518,36 +446,28 @@ define <8 x i8> @test_mask_cmp_d_128(<4
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpled %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnled %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltd %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd1,0x01]
+; CHECK-NEXT:    vpcmpled %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordd %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x03]
+; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnled %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordd %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
@@ -575,36 +495,28 @@ define <8 x i8> @test_ucmp_d_128(<4 x i3
 ; CHECK-LABEL: test_ucmp_d_128:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpequd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleud %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordud %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd9,0x03]
+; CHECK-NEXT:    vpcmpnequd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordud %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
@@ -631,36 +543,28 @@ define <8 x i8> @test_mask_ucmp_d_128(<4
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpequd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltud %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleud %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordud %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordud %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltud %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordud %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe1,0x03]
+; CHECK-NEXT:    vpcmpnequd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordud %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
@@ -688,36 +592,28 @@ define <8 x i8> @test_cmp_q_128(<2 x i64
 ; CHECK-LABEL: test_cmp_q_128:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunordq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x03]
+; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf1,0x06]
+; CHECK-NEXT:    vpcmpordq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
@@ -744,36 +640,28 @@ define <8 x i8> @test_mask_cmp_q_128(<2
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunordq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmpordq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunordq %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x03]
+; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xf9,0x06]
+; CHECK-NEXT:    vpcmpordq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
@@ -801,36 +689,28 @@ define <8 x i8> @test_ucmp_q_128(<2 x i6
 ; CHECK-LABEL: test_ucmp_q_128:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpequq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleuq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunorduq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmporduq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01]
+; CHECK-NEXT:    vpcmpleuq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02]
+; CHECK-NEXT:    vpcmpunorduq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd9,0x03]
+; CHECK-NEXT:    vpcmpnequq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x04]
+; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x05]
+; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf1,0x06]
+; CHECK-NEXT:    vpcmporduq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
@@ -857,36 +737,28 @@ define <8 x i8> @test_mask_ucmp_q_128(<2
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
 ; CHECK-NEXT:    vpcmpequq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x00]
-; CHECK-NEXT:    kmovw %k0, %r8d ## encoding: [0xc5,0x78,0x93,0xc0]
-; CHECK-NEXT:    vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x01]
-; CHECK-NEXT:    kmovw %k0, %r9d ## encoding: [0xc5,0x78,0x93,0xc8]
-; CHECK-NEXT:    vpcmpleuq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x02]
-; CHECK-NEXT:    kmovw %k0, %r10d ## encoding: [0xc5,0x78,0x93,0xd0]
-; CHECK-NEXT:    vpcmpunorduq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x03]
-; CHECK-NEXT:    kmovw %k0, %r11d ## encoding: [0xc5,0x78,0x93,0xd8]
-; CHECK-NEXT:    vpcmpnequq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x04]
-; CHECK-NEXT:    kmovw %k0, %edi ## encoding: [0xc5,0xf8,0x93,0xf8]
-; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x05]
-; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x06]
-; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT:    vpcmporduq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc1,0x07]
-; CHECK-NEXT:    kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
-; CHECK-NEXT:    movzbl %r8b, %esi ## encoding: [0x41,0x0f,0xb6,0xf0]
-; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x00]
-; CHECK-NEXT:    movzbl %r9b, %esi ## encoding: [0x41,0x0f,0xb6,0xf1]
-; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x02]
-; CHECK-NEXT:    movzbl %r10b, %esi ## encoding: [0x41,0x0f,0xb6,0xf2]
-; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x04]
-; CHECK-NEXT:    movzbl %r11b, %esi ## encoding: [0x41,0x0f,0xb6,0xf3]
-; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x06]
-; CHECK-NEXT:    movzbl %dil, %esi ## encoding: [0x40,0x0f,0xb6,0xf7]
-; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc6,0x08]
-; CHECK-NEXT:    movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
+; CHECK-NEXT:    vpcmpltuq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01]
+; CHECK-NEXT:    vpcmpleuq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02]
+; CHECK-NEXT:    vpcmpunorduq %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe1,0x03]
+; CHECK-NEXT:    vpcmpnequq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe9,0x04]
+; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x05]
+; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf9,0x06]
+; CHECK-NEXT:    vpcmporduq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xc9,0x07]
+; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00]
+; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
+; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
+; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT:    movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT:    movzbl %dl, %eax ## encoding: [0x0f,0xb6,0xc2]
+; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)




More information about the llvm-commits mailing list