[llvm] r323175 - [X86] Don't reorder (srl (and X, C1), C2) if (and X, C1) can be matched as a movzx

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 22 21:45:52 PST 2018


Author: ctopper
Date: Mon Jan 22 21:45:52 2018
New Revision: 323175

URL: http://llvm.org/viewvc/llvm-project?rev=323175&view=rev
Log:
[X86] Don't reorder (srl (and X, C1), C2) if (and X, C1) can be matched as a movzx

Summary:
If we can match as a zero extend there's no need to flip the order to get an encoding benefit. As movzx is 3 bytes with independent source/dest registers. The shortest 'and' we could make is also 3 bytes unless we get lucky in the register allocator and its on AL/AX/EAX which have a 2 byte encoding.

This patch was more impressive before r322957 went in. It removed some of the same Ands that got deleted by that patch.

Reviewers: spatel, RKSimon

Reviewed By: spatel

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D42313

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
    llvm/trunk/test/CodeGen/X86/zext-demanded.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=323175&r1=323174&r2=323175&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Jan 22 21:45:52 2018
@@ -33110,6 +33110,14 @@ static SDValue combineShiftRightLogical(
   // transform should reduce code size. It may also enable secondary transforms
   // from improved known-bits analysis or instruction selection.
   APInt MaskVal = AndC->getAPIntValue();
+
+  // If this can be matched by a zero extend, don't optimize.
+  if (MaskVal.isMask()) {
+    unsigned TO = MaskVal.countTrailingOnes();
+    if (TO >= 8 && isPowerOf2_32(TO))
+      return SDValue();
+  }
+
   APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue());
   unsigned OldMaskSize = MaskVal.getMinSignedBits();
   unsigned NewMaskSize = NewMaskVal.getMinSignedBits();

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=323175&r1=323174&r2=323175&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Mon Jan 22 21:45:52 2018
@@ -112,21 +112,21 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    .cfi_offset %esi, -12
 ; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    kmovd %ecx, %k0
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    kmovd %eax, %k0
 ; X32-NEXT:    kshiftrq $1, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $2, %cl
+; X32-NEXT:    shrb %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $62, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $2, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %edx
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $2, %dl
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
@@ -134,127 +134,124 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kshiftrq $61, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $3, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $60, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $4, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $4, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $59, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $5, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $5, %cl
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $58, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $6, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $6, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $57, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $7, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $7, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $56, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $8, %k0, %k1
-; X32-NEXT:    movb %ch, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movb %ah, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $55, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $9, %k0, %k1
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    andb $2, %cl
+; X32-NEXT:    shrb %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $54, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $10, %k0, %k1
-; X32-NEXT:    movb %ch, %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %edx
+; X32-NEXT:    movb %ah, %cl
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $2, %dl
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movzwl %si, %edx
+; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    shrl $12, %ecx
+; X32-NEXT:    kmovd %ecx, %k3
+; X32-NEXT:    movl %esi, %ecx
+; X32-NEXT:    shrl $13, %ecx
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k4
+; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    shrl $14, %ecx
+; X32-NEXT:    kmovd %ecx, %k5
+; X32-NEXT:    shrl $16, %eax
+; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andb $2, %bl
+; X32-NEXT:    shrb %bl
+; X32-NEXT:    kmovd %ebx, %k6
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %ebx
+; X32-NEXT:    shrb $2, %bl
+; X32-NEXT:    kmovd %ebx, %k7
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $53, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $11, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $12, %eax
-; X32-NEXT:    andl $15, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $13, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k3
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $14, %eax
-; X32-NEXT:    andl $3, %eax
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $15, %eax
-; X32-NEXT:    andl $1, %eax
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $16, %edx
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    andb $15, %bl
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $52, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $12, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    kxorq %k3, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $51, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $50, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    kxorq %k5, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $49, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
+; X32-NEXT:    shrl $15, %edx
+; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $48, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $16, %k0, %k1
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $47, %k1, %k1
@@ -270,46 +267,46 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kshiftrq $45, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $19, %k0, %k1
-; X32-NEXT:    shrb $3, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $44, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $20, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $4, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $4, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $43, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $21, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $5, %bl
-; X32-NEXT:    andb $1, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $5, %cl
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $42, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $22, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $6, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $6, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $41, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $23, %k0, %k1
-; X32-NEXT:    shrb $7, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    shrb $7, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $40, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $24, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
+; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl $24, %edx
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
@@ -317,19 +314,19 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kshiftrq $39, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $25, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    andb $2, %bl
-; X32-NEXT:    shrb %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $38, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $26, %k0, %k1
 ; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $37, %k1, %k1
@@ -342,26 +339,27 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kshiftrq $36, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $28, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $28, %edx
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %esi, %ecx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $28, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $35, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $29, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $29, %edx
-; X32-NEXT:    andb $1, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $29, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $34, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $30, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $30, %edx
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $30, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $33, %k1, %k1
@@ -374,99 +372,99 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kshiftrq $32, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    kmovd %ebx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $31, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $33, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $30, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $34, %k0, %k1
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    shrb $2, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $29, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $35, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    shrb $3, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $28, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $36, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $4, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $27, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $37, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $5, %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $26, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $38, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $6, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $25, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $39, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $7, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $24, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $40, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movb %bh, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $13, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k5
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movb %bh, %al
+; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $2, %cl
+; X32-NEXT:    kmovd %ecx, %k3
+; X32-NEXT:    shrb $3, %al
+; X32-NEXT:    kmovd %eax, %k4
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $13, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k5
+; X32-NEXT:    movl %ebx, %ecx
 ; X32-NEXT:    shrl $16, %ecx
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $2, %dl
-; X32-NEXT:    shrb %dl
-; X32-NEXT:    kmovd %edx, %k6
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k6
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k7
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $23, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -477,23 +475,20 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $42, %k0, %k1
 ; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $21, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k0
-; X32-NEXT:    kshiftrq $43, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    movzwl %bx, %eax
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    andl $15, %esi
 ; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    andl $3, %esi
 ; X32-NEXT:    kmovd %esi, %k3
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $15, %esi
-; X32-NEXT:    andl $1, %esi
-; X32-NEXT:    kmovd %esi, %k4
+; X32-NEXT:    kshiftlq $63, %k1, %k1
+; X32-NEXT:    kshiftrq $21, %k1, %k1
+; X32-NEXT:    kxorq %k0, %k1, %k0
+; X32-NEXT:    kshiftrq $43, %k0, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    shrl $15, %eax
+; X32-NEXT:    kmovd %eax, %k4
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $20, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -541,30 +536,30 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kshiftrq $12, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k4
 ; X32-NEXT:    kshiftrq $52, %k4, %k0
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $4, %dl
-; X32-NEXT:    kmovd %edx, %k1
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $4, %al
+; X32-NEXT:    kmovd %eax, %k1
 ; X32-NEXT:    kxorq %k1, %k0, %k5
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $5, %dl
-; X32-NEXT:    andb $1, %dl
-; X32-NEXT:    kmovd %edx, %k6
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $6, %dl
-; X32-NEXT:    kmovd %edx, %k7
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $5, %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k6
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $6, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    shrb $7, %cl
 ; X32-NEXT:    kmovd %ecx, %k0
-; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    movl %ebx, %ecx
 ; X32-NEXT:    shrl $24, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $2, %dl
-; X32-NEXT:    shrb %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k3
 ; X32-NEXT:    kshiftlq $63, %k5, %k5
 ; X32-NEXT:    kshiftrq $11, %k5, %k5
 ; X32-NEXT:    kxorq %k4, %k5, %k4
@@ -577,10 +572,10 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kxorq %k7, %k4, %k6
 ; X32-NEXT:    shrb $3, %cl
 ; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $29, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k7
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $29, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    kshiftlq $63, %k6, %k6
 ; X32-NEXT:    kshiftrq $9, %k6, %k6
 ; X32-NEXT:    kxorq %k5, %k6, %k5
@@ -591,12 +586,12 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kxorq %k5, %k0, %k0
 ; X32-NEXT:    kshiftrq $56, %k0, %k5
 ; X32-NEXT:    kxorq %k1, %k5, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $28, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $30, %ecx
-; X32-NEXT:    kmovd %ecx, %k6
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $28, %eax
+; X32-NEXT:    kmovd %eax, %k5
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $30, %eax
+; X32-NEXT:    kmovd %eax, %k6
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $7, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -627,8 +622,8 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $62, %k0, %k1
 ; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    shrl $31, %eax
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    shrl $31, %ebx
+; X32-NEXT:    kmovd %ebx, %k2
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $1, %k1, %k1
@@ -666,21 +661,21 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    .cfi_def_cfa_offset 12
 ; X32-NEXT:    .cfi_offset %esi, -12
 ; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    kmovd %ecx, %k0
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    kmovd %eax, %k0
 ; X32-NEXT:    kshiftrq $1, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $2, %cl
+; X32-NEXT:    shrb %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $62, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $2, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %edx
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $2, %dl
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
@@ -688,127 +683,124 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kshiftrq $61, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $3, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $60, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $4, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $4, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $59, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $5, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $5, %cl
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $58, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $6, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $6, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $57, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $7, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $7, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $56, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $8, %k0, %k1
-; X32-NEXT:    movb %ch, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movb %ah, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $55, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $9, %k0, %k1
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    andb $2, %cl
+; X32-NEXT:    shrb %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $54, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $10, %k0, %k1
-; X32-NEXT:    movb %ch, %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %edx
+; X32-NEXT:    movb %ah, %cl
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $2, %dl
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movzwl %si, %edx
+; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    shrl $12, %ecx
+; X32-NEXT:    kmovd %ecx, %k3
+; X32-NEXT:    movl %esi, %ecx
+; X32-NEXT:    shrl $13, %ecx
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k4
+; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    shrl $14, %ecx
+; X32-NEXT:    kmovd %ecx, %k5
+; X32-NEXT:    shrl $16, %eax
+; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andb $2, %bl
+; X32-NEXT:    shrb %bl
+; X32-NEXT:    kmovd %ebx, %k6
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %ebx
+; X32-NEXT:    shrb $2, %bl
+; X32-NEXT:    kmovd %ebx, %k7
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $53, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
-; X32-NEXT:    kshiftrq $11, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    kshiftrq $11, %k0, %k1
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $12, %eax
-; X32-NEXT:    andl $15, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $13, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k3
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $14, %eax
-; X32-NEXT:    andl $3, %eax
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $15, %eax
-; X32-NEXT:    andl $1, %eax
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $16, %edx
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    andb $15, %bl
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $52, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $12, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    kxorq %k3, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $51, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $50, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    kxorq %k5, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $49, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
+; X32-NEXT:    shrl $15, %edx
+; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $48, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $16, %k0, %k1
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $47, %k1, %k1
@@ -824,46 +816,46 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kshiftrq $45, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $19, %k0, %k1
-; X32-NEXT:    shrb $3, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $44, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $20, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $4, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $4, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $43, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $21, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $5, %bl
-; X32-NEXT:    andb $1, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $5, %cl
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $42, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $22, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $6, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $6, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $41, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $23, %k0, %k1
-; X32-NEXT:    shrb $7, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    shrb $7, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $40, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $24, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
+; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl $24, %edx
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
@@ -871,19 +863,19 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kshiftrq $39, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $25, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    andb $2, %bl
-; X32-NEXT:    shrb %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $38, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $26, %k0, %k1
 ; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $37, %k1, %k1
@@ -896,26 +888,27 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kshiftrq $36, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $28, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $28, %edx
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %esi, %ecx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $28, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $35, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $29, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $29, %edx
-; X32-NEXT:    andb $1, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $29, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $34, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $30, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $30, %edx
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $30, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $33, %k1, %k1
@@ -928,99 +921,99 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kshiftrq $32, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    kmovd %ebx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $31, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $33, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $30, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $34, %k0, %k1
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    shrb $2, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $29, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $35, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    shrb $3, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $28, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $36, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $4, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $27, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $37, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $5, %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $26, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $38, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $6, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $25, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $39, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $7, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $24, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $40, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movb %bh, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $13, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k5
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movb %bh, %al
+; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $2, %cl
+; X32-NEXT:    kmovd %ecx, %k3
+; X32-NEXT:    shrb $3, %al
+; X32-NEXT:    kmovd %eax, %k4
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $13, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k5
+; X32-NEXT:    movl %ebx, %ecx
 ; X32-NEXT:    shrl $16, %ecx
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $2, %dl
-; X32-NEXT:    shrb %dl
-; X32-NEXT:    kmovd %edx, %k6
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k6
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k7
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $23, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -1031,23 +1024,20 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $42, %k0, %k1
 ; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $21, %k1, %k1
-; X32-NEXT:    kxorq %k0, %k1, %k0
-; X32-NEXT:    kshiftrq $43, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    movzwl %bx, %eax
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    andl $15, %esi
 ; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    andl $3, %esi
 ; X32-NEXT:    kmovd %esi, %k3
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $15, %esi
-; X32-NEXT:    andl $1, %esi
-; X32-NEXT:    kmovd %esi, %k4
+; X32-NEXT:    kshiftlq $63, %k1, %k1
+; X32-NEXT:    kshiftrq $21, %k1, %k1
+; X32-NEXT:    kxorq %k0, %k1, %k0
+; X32-NEXT:    kshiftrq $43, %k0, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    shrl $15, %eax
+; X32-NEXT:    kmovd %eax, %k4
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $20, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -1095,30 +1085,30 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kshiftrq $12, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k4
 ; X32-NEXT:    kshiftrq $52, %k4, %k0
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $4, %dl
-; X32-NEXT:    kmovd %edx, %k1
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $4, %al
+; X32-NEXT:    kmovd %eax, %k1
 ; X32-NEXT:    kxorq %k1, %k0, %k5
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $5, %dl
-; X32-NEXT:    andb $1, %dl
-; X32-NEXT:    kmovd %edx, %k6
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $6, %dl
-; X32-NEXT:    kmovd %edx, %k7
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $5, %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k6
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $6, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    shrb $7, %cl
 ; X32-NEXT:    kmovd %ecx, %k0
-; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    movl %ebx, %ecx
 ; X32-NEXT:    shrl $24, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $2, %dl
-; X32-NEXT:    shrb %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k3
 ; X32-NEXT:    kshiftlq $63, %k5, %k5
 ; X32-NEXT:    kshiftrq $11, %k5, %k5
 ; X32-NEXT:    kxorq %k4, %k5, %k4
@@ -1131,10 +1121,10 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kxorq %k7, %k4, %k6
 ; X32-NEXT:    shrb $3, %cl
 ; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $29, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k7
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $29, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    kshiftlq $63, %k6, %k6
 ; X32-NEXT:    kshiftrq $9, %k6, %k6
 ; X32-NEXT:    kxorq %k5, %k6, %k5
@@ -1145,12 +1135,12 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kxorq %k5, %k0, %k0
 ; X32-NEXT:    kshiftrq $56, %k0, %k5
 ; X32-NEXT:    kxorq %k1, %k5, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $28, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $30, %ecx
-; X32-NEXT:    kmovd %ecx, %k6
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $28, %eax
+; X32-NEXT:    kmovd %eax, %k5
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $30, %eax
+; X32-NEXT:    kmovd %eax, %k6
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $7, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -1181,8 +1171,8 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $62, %k0, %k1
 ; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    shrl $31, %eax
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    shrl $31, %ebx
+; X32-NEXT:    kmovd %ebx, %k2
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $1, %k1, %k1
@@ -1701,21 +1691,21 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    subl $8, %esp
 ; X32-NEXT:    .cfi_offset %esi, -16
 ; X32-NEXT:    .cfi_offset %ebx, -12
-; X32-NEXT:    movl 8(%ebp), %ecx
-; X32-NEXT:    kmovd %ecx, %k0
+; X32-NEXT:    movl 8(%ebp), %eax
+; X32-NEXT:    kmovd %eax, %k0
 ; X32-NEXT:    kshiftrq $1, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $2, %cl
+; X32-NEXT:    shrb %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $62, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $2, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %edx
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $2, %dl
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
@@ -1723,127 +1713,124 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    kshiftrq $61, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $3, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $60, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $4, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $4, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $59, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $5, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $5, %cl
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $58, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $6, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $6, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $57, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $7, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $7, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $56, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $8, %k0, %k1
-; X32-NEXT:    movb %ch, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movb %ah, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movb %ch, %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $13, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $16, %edx
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    andb $15, %bl
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    movl 12(%ebp), %eax
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $55, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $9, %k0, %k1
+; X32-NEXT:    andb $2, %cl
+; X32-NEXT:    shrb %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $54, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $10, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    movb %ah, %cl
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %edx
+; X32-NEXT:    shrb $2, %dl
+; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movzwl %si, %edx
+; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    shrl $12, %ecx
+; X32-NEXT:    kmovd %ecx, %k3
+; X32-NEXT:    movl %esi, %ecx
+; X32-NEXT:    shrl $13, %ecx
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k4
+; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    shrl $14, %ecx
+; X32-NEXT:    kmovd %ecx, %k5
+; X32-NEXT:    shrl $16, %eax
+; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andb $2, %bl
+; X32-NEXT:    shrb %bl
+; X32-NEXT:    kmovd %ebx, %k6
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %ebx
+; X32-NEXT:    shrb $2, %bl
+; X32-NEXT:    kmovd %ebx, %k7
+; X32-NEXT:    movl 12(%ebp), %ebx
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $53, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $11, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $52, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $12, %k0, %k1
-; X32-NEXT:    movl %ecx, %esi
-; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    andl $15, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    kxorq %k3, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $51, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $50, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    movl %ecx, %esi
-; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    andl $3, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    kxorq %k5, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $49, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    movl %ecx, %esi
-; X32-NEXT:    shrl $15, %esi
-; X32-NEXT:    andl $1, %esi
-; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    shrl $15, %edx
+; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $48, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $16, %k0, %k1
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $47, %k1, %k1
@@ -1859,46 +1846,46 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    kshiftrq $45, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $19, %k0, %k1
-; X32-NEXT:    shrb $3, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $44, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $20, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $4, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $4, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $43, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $21, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $5, %bl
-; X32-NEXT:    andb $1, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $5, %cl
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $42, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $22, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $6, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $6, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $41, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $23, %k0, %k1
-; X32-NEXT:    shrb $7, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    shrb $7, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $40, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $24, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
+; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl $24, %edx
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
@@ -1906,19 +1893,19 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    kshiftrq $39, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $25, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    andb $2, %bl
-; X32-NEXT:    shrb %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $38, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $26, %k0, %k1
 ; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $37, %k1, %k1
@@ -1931,26 +1918,27 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    kshiftrq $36, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $28, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $28, %edx
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %esi, %ecx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $28, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $35, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $29, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $29, %edx
-; X32-NEXT:    andb $1, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $29, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $34, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $30, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $30, %edx
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $30, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $33, %k1, %k1
@@ -1963,99 +1951,99 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    kshiftrq $32, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    kmovd %ebx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $31, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $33, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $30, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $34, %k0, %k1
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    shrb $2, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $29, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $35, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    shrb $3, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $28, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $36, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $4, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $27, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $37, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $5, %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $26, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $38, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $6, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $25, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $39, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $7, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $24, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $40, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movb %bh, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $13, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k5
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movb %bh, %al
+; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $2, %cl
+; X32-NEXT:    kmovd %ecx, %k3
+; X32-NEXT:    shrb $3, %al
+; X32-NEXT:    kmovd %eax, %k4
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $13, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k5
+; X32-NEXT:    movl %ebx, %ecx
 ; X32-NEXT:    shrl $16, %ecx
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $2, %dl
-; X32-NEXT:    shrb %dl
-; X32-NEXT:    kmovd %edx, %k6
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k6
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k7
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $23, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -2075,9 +2063,9 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    kshiftrq $20, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $44, %k0, %k1
+; X32-NEXT:    movzwl %bx, %eax
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    andl $15, %esi
 ; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
@@ -2091,17 +2079,14 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    kshiftrq $46, %k0, %k1
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    andl $3, %esi
 ; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $17, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $47, %k0, %k1
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $15, %esi
-; X32-NEXT:    andl $1, %esi
-; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    shrl $15, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $16, %k1, %k1
@@ -2130,30 +2115,30 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    kshiftrq $12, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $52, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $4, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $4, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $5, %dl
-; X32-NEXT:    andb $1, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $6, %dl
-; X32-NEXT:    kmovd %edx, %k3
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $5, %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $6, %al
+; X32-NEXT:    kmovd %eax, %k3
 ; X32-NEXT:    shrb $7, %cl
 ; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    movl %ebx, %ecx
 ; X32-NEXT:    shrl $24, %ecx
 ; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $2, %dl
-; X32-NEXT:    shrb %dl
-; X32-NEXT:    kmovd %edx, %k6
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k6
 ; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k7
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $11, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -2195,34 +2180,34 @@ define i64 @test_mm512_mask_test_epi8_ma
 ; X32-NEXT:    kshiftrq $4, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $60, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $28, %ecx
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $28, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $3, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $61, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $29, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $29, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $2, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $62, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $30, %ecx
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $30, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $1, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftlq $1, %k0, %k0
 ; X32-NEXT:    kshiftrq $1, %k0, %k0
-; X32-NEXT:    shrl $31, %eax
-; X32-NEXT:    kmovd %eax, %k1
+; X32-NEXT:    shrl $31, %ebx
+; X32-NEXT:    kmovd %ebx, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    korq %k1, %k0, %k1
 ; X32-NEXT:    vptestmb %zmm0, %zmm1, %k0 {%k1}
@@ -2348,21 +2333,21 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    subl $8, %esp
 ; X32-NEXT:    .cfi_offset %esi, -16
 ; X32-NEXT:    .cfi_offset %ebx, -12
-; X32-NEXT:    movl 8(%ebp), %ecx
-; X32-NEXT:    kmovd %ecx, %k0
+; X32-NEXT:    movl 8(%ebp), %eax
+; X32-NEXT:    kmovd %eax, %k0
 ; X32-NEXT:    kshiftrq $1, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $2, %cl
+; X32-NEXT:    shrb %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $62, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $2, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %edx
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    shrb $2, %dl
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
@@ -2370,127 +2355,124 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    kshiftrq $61, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $3, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $60, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $4, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $4, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $59, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $5, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $5, %cl
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $58, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $6, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $6, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $57, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $7, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $7, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $56, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $8, %k0, %k1
-; X32-NEXT:    movb %ch, %al
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movb %ah, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movb %ch, %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrl $13, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $16, %edx
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    andb $15, %bl
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    movl 12(%ebp), %eax
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $55, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $9, %k0, %k1
+; X32-NEXT:    andb $2, %cl
+; X32-NEXT:    shrb %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $54, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $10, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
+; X32-NEXT:    movb %ah, %cl
+; X32-NEXT:    movl %eax, %esi
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %edx
+; X32-NEXT:    shrb $2, %dl
+; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movzwl %si, %edx
+; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    shrl $12, %ecx
+; X32-NEXT:    kmovd %ecx, %k3
+; X32-NEXT:    movl %esi, %ecx
+; X32-NEXT:    shrl $13, %ecx
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k4
+; X32-NEXT:    movl %edx, %ecx
+; X32-NEXT:    shrl $14, %ecx
+; X32-NEXT:    kmovd %ecx, %k5
+; X32-NEXT:    shrl $16, %eax
+; X32-NEXT:    movl %eax, %ebx
+; X32-NEXT:    andb $2, %bl
+; X32-NEXT:    shrb %bl
+; X32-NEXT:    kmovd %ebx, %k6
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    andb $15, %cl
+; X32-NEXT:    movl %ecx, %ebx
+; X32-NEXT:    shrb $2, %bl
+; X32-NEXT:    kmovd %ebx, %k7
+; X32-NEXT:    movl 12(%ebp), %ebx
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $53, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $11, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
+; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $52, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $12, %k0, %k1
-; X32-NEXT:    movl %ecx, %esi
-; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    andl $15, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    kxorq %k3, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $51, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
+; X32-NEXT:    kxorq %k4, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $50, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    movl %ecx, %esi
-; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    andl $3, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
+; X32-NEXT:    kxorq %k5, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $49, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    movl %ecx, %esi
-; X32-NEXT:    shrl $15, %esi
-; X32-NEXT:    andl $1, %esi
-; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    shrl $15, %edx
+; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $48, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $16, %k0, %k1
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $47, %k1, %k1
@@ -2506,46 +2488,46 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    kshiftrq $45, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $19, %k0, %k1
-; X32-NEXT:    shrb $3, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    shrb $3, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $44, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $20, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $4, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $4, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $43, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $21, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $5, %bl
-; X32-NEXT:    andb $1, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $5, %cl
+; X32-NEXT:    andb $1, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $42, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $22, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $6, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $6, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $41, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $23, %k0, %k1
-; X32-NEXT:    shrb $7, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    shrb $7, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $40, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $24, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
+; X32-NEXT:    movl %esi, %edx
 ; X32-NEXT:    shrl $24, %edx
 ; X32-NEXT:    kmovd %edx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
@@ -2553,19 +2535,19 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    kshiftrq $39, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $25, %k0, %k1
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    andb $2, %bl
-; X32-NEXT:    shrb %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $38, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $26, %k0, %k1
 ; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $37, %k1, %k1
@@ -2578,26 +2560,27 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    kshiftrq $36, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $28, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $28, %edx
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %esi, %ecx
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $28, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $35, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $29, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $29, %edx
-; X32-NEXT:    andb $1, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $29, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $34, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $30, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrl $30, %edx
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrl $30, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $33, %k1, %k1
@@ -2610,99 +2593,99 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    kshiftrq $32, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    kmovd %ebx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $31, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $33, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $30, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $34, %k0, %k1
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    shrb $2, %cl
+; X32-NEXT:    kmovd %ecx, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $29, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $35, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    shrb $3, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $28, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $36, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $4, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $27, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $37, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $5, %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $26, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $38, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $6, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $25, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $39, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrb $7, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $24, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $40, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movb %bh, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k3
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $13, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k5
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movb %bh, %al
+; X32-NEXT:    andb $15, %al
 ; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrb $2, %cl
+; X32-NEXT:    kmovd %ecx, %k3
+; X32-NEXT:    shrb $3, %al
+; X32-NEXT:    kmovd %eax, %k4
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $13, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k5
+; X32-NEXT:    movl %ebx, %ecx
 ; X32-NEXT:    shrl $16, %ecx
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $2, %dl
-; X32-NEXT:    shrb %dl
-; X32-NEXT:    kmovd %edx, %k6
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k6
 ; X32-NEXT:    movl %ecx, %edx
 ; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k7
+; X32-NEXT:    movl %edx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $23, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -2722,9 +2705,9 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    kshiftrq $20, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $44, %k0, %k1
+; X32-NEXT:    movzwl %bx, %eax
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    andl $15, %esi
 ; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
@@ -2738,17 +2721,14 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    kshiftrq $46, %k0, %k1
 ; X32-NEXT:    movl %eax, %esi
 ; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    andl $3, %esi
 ; X32-NEXT:    kmovd %esi, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $17, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $47, %k0, %k1
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $15, %esi
-; X32-NEXT:    andl $1, %esi
-; X32-NEXT:    kmovd %esi, %k2
+; X32-NEXT:    shrl $15, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $16, %k1, %k1
@@ -2777,30 +2757,30 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    kshiftrq $12, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $52, %k0, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $4, %dl
-; X32-NEXT:    kmovd %edx, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $4, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $5, %dl
-; X32-NEXT:    andb $1, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $6, %dl
-; X32-NEXT:    kmovd %edx, %k3
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $5, %al
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $6, %al
+; X32-NEXT:    kmovd %eax, %k3
 ; X32-NEXT:    shrb $7, %cl
 ; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    movl %ebx, %ecx
 ; X32-NEXT:    shrl $24, %ecx
 ; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $2, %dl
-; X32-NEXT:    shrb %dl
-; X32-NEXT:    kmovd %edx, %k6
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    andb $2, %al
+; X32-NEXT:    shrb %al
+; X32-NEXT:    kmovd %eax, %k6
 ; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k7
+; X32-NEXT:    movl %ecx, %eax
+; X32-NEXT:    shrb $2, %al
+; X32-NEXT:    kmovd %eax, %k7
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $11, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
@@ -2842,34 +2822,34 @@ define i64 @test_mm512_mask_testn_epi8_m
 ; X32-NEXT:    kshiftrq $4, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $60, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $28, %ecx
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $28, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $3, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $61, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $29, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $29, %eax
+; X32-NEXT:    andb $1, %al
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $2, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftrq $62, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $30, %ecx
-; X32-NEXT:    kmovd %ecx, %k2
+; X32-NEXT:    movl %ebx, %eax
+; X32-NEXT:    shrl $30, %eax
+; X32-NEXT:    kmovd %eax, %k2
 ; X32-NEXT:    kxorq %k2, %k1, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    kshiftrq $1, %k1, %k1
 ; X32-NEXT:    kxorq %k0, %k1, %k0
 ; X32-NEXT:    kshiftlq $1, %k0, %k0
 ; X32-NEXT:    kshiftrq $1, %k0, %k0
-; X32-NEXT:    shrl $31, %eax
-; X32-NEXT:    kmovd %eax, %k1
+; X32-NEXT:    shrl $31, %ebx
+; X32-NEXT:    kmovd %ebx, %k1
 ; X32-NEXT:    kshiftlq $63, %k1, %k1
 ; X32-NEXT:    korq %k1, %k0, %k1
 ; X32-NEXT:    vptestnmb %zmm0, %zmm1, %k0 {%k1}

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=323175&r1=323174&r2=323175&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Mon Jan 22 21:45:52 2018
@@ -1797,14 +1797,20 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ;
 ; AVX512F-32-LABEL: test_mask_cmp_b_512:
 ; AVX512F-32:       # %bb.0:
-; AVX512F-32-NEXT:    pushl %ebx
+; AVX512F-32-NEXT:    pushl %ebp
 ; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
-; AVX512F-32-NEXT:    pushl %esi
+; AVX512F-32-NEXT:    pushl %ebx
 ; AVX512F-32-NEXT:    .cfi_def_cfa_offset 12
+; AVX512F-32-NEXT:    pushl %edi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    pushl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 20
 ; AVX512F-32-NEXT:    subl $60, %esp
-; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
-; AVX512F-32-NEXT:    .cfi_offset %esi, -12
-; AVX512F-32-NEXT:    .cfi_offset %ebx, -8
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT:    .cfi_offset %esi, -20
+; AVX512F-32-NEXT:    .cfi_offset %edi, -16
+; AVX512F-32-NEXT:    .cfi_offset %ebx, -12
+; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $16, %eax
@@ -1844,7 +1850,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    kmovd %ecx, %k5
 ; AVX512F-32-NEXT:    movb %bh, %cl
 ; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    movl %ebx, %esi
+; AVX512F-32-NEXT:    movl %ebx, %ebp
 ; AVX512F-32-NEXT:    andb $2, %cl
 ; AVX512F-32-NEXT:    shrb %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
@@ -1870,7 +1876,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    kshiftrq $5, %k7, %k0
 ; AVX512F-32-NEXT:    kxorq %k4, %k0, %k4
 ; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %esi, %ecx
+; AVX512F-32-NEXT:    movl %ebp, %ecx
 ; AVX512F-32-NEXT:    shrl $13, %ecx
 ; AVX512F-32-NEXT:    andb $1, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
@@ -1915,9 +1921,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    shrb $5, %cl
 ; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    movl %esi, %edx
+; AVX512F-32-NEXT:    movzwl %bp, %edx
+; AVX512F-32-NEXT:    movl %edx, %esi
+; AVX512F-32-NEXT:    movl %edx, %edi
 ; AVX512F-32-NEXT:    shrl $12, %edx
-; AVX512F-32-NEXT:    andl $15, %edx
 ; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
 ; AVX512F-32-NEXT:    kshiftrq $53, %k3, %k3
 ; AVX512F-32-NEXT:    kxorq %k7, %k3, %k3
@@ -1932,28 +1939,24 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    movl %eax, %edx
 ; AVX512F-32-NEXT:    shrb $6, %dl
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $14, %ecx
-; AVX512F-32-NEXT:    andl $3, %ecx
+; AVX512F-32-NEXT:    shrl $15, %esi
+; AVX512F-32-NEXT:    shrl $14, %edi
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
 ; AVX512F-32-NEXT:    kshiftrq $51, %k7, %k7
 ; AVX512F-32-NEXT:    kxorq %k3, %k7, %k3
 ; AVX512F-32-NEXT:    kshiftrq $13, %k3, %k7
 ; AVX512F-32-NEXT:    kxorq %k4, %k7, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $15, %ecx
-; AVX512F-32-NEXT:    andl $1, %ecx
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $50, %k4, %k4
 ; AVX512F-32-NEXT:    kxorq %k3, %k4, %k3
 ; AVX512F-32-NEXT:    kshiftrq $14, %k3, %k4
+; AVX512F-32-NEXT:    kmovd %edi, %k7
 ; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $49, %k4, %k4
 ; AVX512F-32-NEXT:    kxorq %k3, %k4, %k3
 ; AVX512F-32-NEXT:    kshiftrq $15, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
+; AVX512F-32-NEXT:    kmovd %esi, %k7
 ; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $48, %k4, %k4
@@ -1962,7 +1965,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    kmovd %eax, %k7
 ; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
 ; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    movl %esi, %edx
+; AVX512F-32-NEXT:    movl %ebp, %edx
 ; AVX512F-32-NEXT:    shrl $24, %edx
 ; AVX512F-32-NEXT:    # kill: def %al killed %al killed %eax def %eax
 ; AVX512F-32-NEXT:    shrb $7, %al
@@ -2002,19 +2005,19 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    shrb $3, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
 ; AVX512F-32-NEXT:    kshiftrq $43, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
-; AVX512F-32-NEXT:    kshiftrq $21, %k1, %k6
-; AVX512F-32-NEXT:    kxorq %k0, %k6, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $29, %ecx
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $42, %k6, %k6
 ; AVX512F-32-NEXT:    kxorq %k1, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $22, %k6, %k1
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k7
+; AVX512F-32-NEXT:    kshiftrq $21, %k6, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
 ; AVX512F-32-NEXT:    kmovd %ecx, %k1
+; AVX512F-32-NEXT:    movl %ebp, %ecx
+; AVX512F-32-NEXT:    shrl $29, %ecx
+; AVX512F-32-NEXT:    andb $1, %cl
+; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $42, %k0, %k0
+; AVX512F-32-NEXT:    kxorq %k6, %k0, %k6
+; AVX512F-32-NEXT:    kshiftrq $22, %k6, %k0
+; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
+; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    movl %ebx, %edx
 ; AVX512F-32-NEXT:    andb $2, %dl
 ; AVX512F-32-NEXT:    shrb %dl
@@ -2045,68 +2048,68 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    shrb $4, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $38, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k7
-; AVX512F-32-NEXT:    kshiftrq $26, %k7, %k4
-; AVX512F-32-NEXT:    kxorq %k2, %k4, %k2
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
+; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
+; AVX512F-32-NEXT:    kshiftrq $26, %k4, %k7
+; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
+; AVX512F-32-NEXT:    kmovd %ecx, %k2
 ; AVX512F-32-NEXT:    movl %ebx, %ecx
 ; AVX512F-32-NEXT:    shrb $5, %cl
 ; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    movl %esi, %edx
+; AVX512F-32-NEXT:    movl %ebp, %edx
 ; AVX512F-32-NEXT:    shrl $28, %edx
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $37, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $27, %k2, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $36, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k2, %k0, %k2
-; AVX512F-32-NEXT:    kshiftrq $28, %k2, %k0
+; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT:    kshiftrq $37, %k7, %k7
+; AVX512F-32-NEXT:    kxorq %k4, %k7, %k4
+; AVX512F-32-NEXT:    kshiftrq $27, %k4, %k7
+; AVX512F-32-NEXT:    kxorq %k1, %k7, %k1
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $36, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k4, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $28, %k1, %k4
 ; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
+; AVX512F-32-NEXT:    kxorq %k7, %k4, %k7
+; AVX512F-32-NEXT:    kmovd %ecx, %k4
 ; AVX512F-32-NEXT:    movl %ebx, %edx
 ; AVX512F-32-NEXT:    shrb $6, %dl
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $30, %ecx
+; AVX512F-32-NEXT:    movl %ebp, %ecx
+; AVX512F-32-NEXT:    shrl $31, %ecx
+; AVX512F-32-NEXT:    movl %ebp, %esi
+; AVX512F-32-NEXT:    shrl $30, %esi
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
 ; AVX512F-32-NEXT:    kshiftrq $35, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k2
-; AVX512F-32-NEXT:    kshiftrq $29, %k2, %k7
 ; AVX512F-32-NEXT:    kxorq %k1, %k7, %k1
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $31, %ecx
+; AVX512F-32-NEXT:    kshiftrq $29, %k1, %k7
+; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $34, %k0, %k0
+; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $30, %k0, %k1
+; AVX512F-32-NEXT:    kmovd %esi, %k7
+; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
 ; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $34, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $30, %k1, %k2
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k2
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $33, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k1
-; AVX512F-32-NEXT:    kshiftrq $31, %k1, %k2
+; AVX512F-32-NEXT:    kshiftrq $33, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $31, %k0, %k1
 ; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k2
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k1
-; AVX512F-32-NEXT:    kshiftrq $32, %k1, %k2
+; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $32, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k1
 ; AVX512F-32-NEXT:    kmovd %ebx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
 ; AVX512F-32-NEXT:    kmovd %edx, %k7
 ; AVX512F-32-NEXT:    movl %ebx, %ecx
 ; AVX512F-32-NEXT:    shrb $7, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $31, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k1
-; AVX512F-32-NEXT:    kshiftrq $33, %k1, %k2
-; AVX512F-32-NEXT:    kxorq %k5, %k2, %k2
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $30, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $34, %k2, %k1
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $31, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $33, %k0, %k1
+; AVX512F-32-NEXT:    kxorq %k5, %k1, %k1
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $30, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $34, %k0, %k1
 ; AVX512F-32-NEXT:    kxorq %k6, %k1, %k5
 ; AVX512F-32-NEXT:    kmovd %ecx, %k6
 ; AVX512F-32-NEXT:    movb %bh, %cl
@@ -2115,34 +2118,34 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    shrb %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
 ; AVX512F-32-NEXT:    kshiftrq $29, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k2, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $35, %k5, %k2
-; AVX512F-32-NEXT:    kxorq %k3, %k2, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
+; AVX512F-32-NEXT:    kxorq %k0, %k5, %k5
+; AVX512F-32-NEXT:    kshiftrq $35, %k5, %k0
+; AVX512F-32-NEXT:    kxorq %k3, %k0, %k3
+; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    shrb $2, %al
 ; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
 ; AVX512F-32-NEXT:    kshiftrq $28, %k3, %k3
 ; AVX512F-32-NEXT:    kxorq %k5, %k3, %k5
 ; AVX512F-32-NEXT:    kshiftrq $36, %k5, %k3
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k4
+; AVX512F-32-NEXT:    kxorq %k2, %k3, %k2
 ; AVX512F-32-NEXT:    kmovd %eax, %k3
 ; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $16, %eax
 ; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $27, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k5, %k4, %k5
-; AVX512F-32-NEXT:    kshiftrq $37, %k5, %k4
-; AVX512F-32-NEXT:    kxorq %k0, %k4, %k0
+; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT:    kshiftrq $27, %k2, %k2
+; AVX512F-32-NEXT:    kxorq %k5, %k2, %k2
+; AVX512F-32-NEXT:    kshiftrq $37, %k2, %k5
+; AVX512F-32-NEXT:    kxorq %k4, %k5, %k5
 ; AVX512F-32-NEXT:    kmovd %ecx, %k4
 ; AVX512F-32-NEXT:    movl %ebx, %ecx
 ; AVX512F-32-NEXT:    shrl $13, %ecx
 ; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $26, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k5, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $38, %k0, %k5
+; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
+; AVX512F-32-NEXT:    kshiftrq $26, %k5, %k5
+; AVX512F-32-NEXT:    kxorq %k2, %k5, %k2
+; AVX512F-32-NEXT:    kshiftrq $38, %k2, %k5
 ; AVX512F-32-NEXT:    kxorq %k7, %k5, %k7
 ; AVX512F-32-NEXT:    kmovd %ecx, %k5
 ; AVX512F-32-NEXT:    movl %eax, %edx
@@ -2150,11 +2153,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    shrb %dl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
 ; AVX512F-32-NEXT:    kshiftrq $25, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $39, %k7, %k0
-; AVX512F-32-NEXT:    kxorq %k6, %k0, %k6
-; AVX512F-32-NEXT:    kmovd %edx, %k0
-; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp) # 8-byte Spill
+; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
+; AVX512F-32-NEXT:    kshiftrq $39, %k7, %k2
+; AVX512F-32-NEXT:    kxorq %k6, %k2, %k6
+; AVX512F-32-NEXT:    kmovd %edx, %k2
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    andb $15, %cl
 ; AVX512F-32-NEXT:    movl %ecx, %edx
@@ -2165,46 +2167,44 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    kshiftrq $40, %k6, %k7
 ; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
 ; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    movl %ebx, %ecx
+; AVX512F-32-NEXT:    kmovq %k1, {{[0-9]+}}(%esp) # 8-byte Spill
+; AVX512F-32-NEXT:    movzwl %bx, %ecx
+; AVX512F-32-NEXT:    movl %ecx, %esi
+; AVX512F-32-NEXT:    movl %ecx, %edi
 ; AVX512F-32-NEXT:    shrl $12, %ecx
-; AVX512F-32-NEXT:    andl $15, %ecx
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
 ; AVX512F-32-NEXT:    kshiftrq $23, %k7, %k7
 ; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
 ; AVX512F-32-NEXT:    kshiftrq $41, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k2
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $14, %ecx
-; AVX512F-32-NEXT:    andl $3, %ecx
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $22, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k6, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $42, %k2, %k6
+; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
+; AVX512F-32-NEXT:    shrl $14, %edi
+; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $22, %k0, %k0
+; AVX512F-32-NEXT:    kxorq %k6, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $42, %k0, %k6
 ; AVX512F-32-NEXT:    kxorq %k3, %k6, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $15, %ecx
-; AVX512F-32-NEXT:    andl $1, %ecx
+; AVX512F-32-NEXT:    kmovd %edi, %k7
+; AVX512F-32-NEXT:    shrl $15, %esi
 ; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
 ; AVX512F-32-NEXT:    kshiftrq $21, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k2, %k3, %k2
-; AVX512F-32-NEXT:    kshiftrq $43, %k2, %k3
+; AVX512F-32-NEXT:    kxorq %k0, %k3, %k0
+; AVX512F-32-NEXT:    kshiftrq $43, %k0, %k3
 ; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k6
+; AVX512F-32-NEXT:    kmovd %esi, %k6
 ; AVX512F-32-NEXT:    shrb $3, %dl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
 ; AVX512F-32-NEXT:    kshiftrq $20, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k2, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $44, %k3, %k2
-; AVX512F-32-NEXT:    kxorq %k0, %k2, %k0
-; AVX512F-32-NEXT:    kmovd %edx, %k2
+; AVX512F-32-NEXT:    kxorq %k0, %k3, %k3
+; AVX512F-32-NEXT:    kshiftrq $44, %k3, %k0
+; AVX512F-32-NEXT:    kxorq %k1, %k0, %k1
+; AVX512F-32-NEXT:    kmovd %edx, %k0
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $19, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k3, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $45, %k0, %k3
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $19, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k3, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $45, %k1, %k3
 ; AVX512F-32-NEXT:    kxorq %k5, %k3, %k4
 ; AVX512F-32-NEXT:    kmovd %ecx, %k3
 ; AVX512F-32-NEXT:    movl %eax, %ecx
@@ -2212,21 +2212,21 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    andb $1, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $18, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k0, %k4, %k0
-; AVX512F-32-NEXT:    kshiftrq $46, %k0, %k4
+; AVX512F-32-NEXT:    kxorq %k1, %k4, %k1
+; AVX512F-32-NEXT:    kshiftrq $46, %k1, %k4
 ; AVX512F-32-NEXT:    kxorq %k7, %k4, %k5
 ; AVX512F-32-NEXT:    kmovd %ecx, %k4
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    shrb $6, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
 ; AVX512F-32-NEXT:    kshiftrq $17, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k0, %k5, %k0
-; AVX512F-32-NEXT:    kshiftrq $47, %k0, %k5
+; AVX512F-32-NEXT:    kxorq %k1, %k5, %k1
+; AVX512F-32-NEXT:    kshiftrq $47, %k1, %k5
 ; AVX512F-32-NEXT:    kxorq %k6, %k5, %k5
 ; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
 ; AVX512F-32-NEXT:    kshiftrq $16, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k0, %k5, %k0
-; AVX512F-32-NEXT:    kshiftrq $48, %k0, %k5
+; AVX512F-32-NEXT:    kxorq %k1, %k5, %k1
+; AVX512F-32-NEXT:    kshiftrq $48, %k1, %k5
 ; AVX512F-32-NEXT:    kmovd %eax, %k6
 ; AVX512F-32-NEXT:    kxorq %k6, %k5, %k6
 ; AVX512F-32-NEXT:    kmovd %ecx, %k5
@@ -2236,17 +2236,17 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    shrb $7, %al
 ; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
 ; AVX512F-32-NEXT:    kshiftrq $15, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k0, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $49, %k6, %k0
-; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k0
+; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
+; AVX512F-32-NEXT:    kshiftrq $49, %k1, %k6
+; AVX512F-32-NEXT:    kxorq %k2, %k6, %k6
+; AVX512F-32-NEXT:    kmovd %eax, %k2
 ; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $14, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
-; AVX512F-32-NEXT:    kshiftrq $50, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
+; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT:    kshiftrq $14, %k6, %k6
+; AVX512F-32-NEXT:    kxorq %k1, %k6, %k6
+; AVX512F-32-NEXT:    kshiftrq $50, %k6, %k1
+; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload
+; AVX512F-32-NEXT:    kxorq %k7, %k1, %k7
 ; AVX512F-32-NEXT:    kmovd %edx, %k1
 ; AVX512F-32-NEXT:    # kill: def %dl killed %dl killed %edx def %edx
 ; AVX512F-32-NEXT:    andb $15, %dl
@@ -2256,8 +2256,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    kshiftrq $13, %k7, %k7
 ; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
 ; AVX512F-32-NEXT:    kshiftrq $51, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k2
+; AVX512F-32-NEXT:    kxorq %k0, %k7, %k7
+; AVX512F-32-NEXT:    kmovd %eax, %k0
 ; AVX512F-32-NEXT:    movl %edx, %eax
 ; AVX512F-32-NEXT:    shrb $2, %dl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
@@ -2286,20 +2286,20 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    kshiftrq $9, %k7, %k7
 ; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
 ; AVX512F-32-NEXT:    kshiftrq $55, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $8, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k6, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $56, %k0, %k6
+; AVX512F-32-NEXT:    kxorq %k2, %k7, %k2
+; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT:    kshiftrq $8, %k2, %k2
+; AVX512F-32-NEXT:    kxorq %k6, %k2, %k2
+; AVX512F-32-NEXT:    kshiftrq $56, %k2, %k6
 ; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
 ; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
 ; AVX512F-32-NEXT:    kshiftrq $7, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
-; AVX512F-32-NEXT:    kshiftrq $57, %k0, %k1
 ; AVX512F-32-NEXT:    kxorq %k2, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $6, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $57, %k1, %k2
+; AVX512F-32-NEXT:    kxorq %k0, %k2, %k0
+; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $6, %k0, %k0
+; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
 ; AVX512F-32-NEXT:    kshiftrq $58, %k0, %k1
 ; AVX512F-32-NEXT:    kxorq %k3, %k1, %k1
 ; AVX512F-32-NEXT:    movl %ebx, %eax
@@ -2362,11 +2362,13 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT:    addl %esi, %eax
+; AVX512F-32-NEXT:    addl %ebp, %eax
 ; AVX512F-32-NEXT:    adcl %ebx, %edx
 ; AVX512F-32-NEXT:    addl $60, %esp
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    popl %edi
 ; AVX512F-32-NEXT:    popl %ebx
+; AVX512F-32-NEXT:    popl %ebp
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
@@ -2495,14 +2497,20 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ;
 ; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
 ; AVX512F-32:       # %bb.0:
-; AVX512F-32-NEXT:    pushl %ebx
+; AVX512F-32-NEXT:    pushl %ebp
 ; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
-; AVX512F-32-NEXT:    pushl %esi
+; AVX512F-32-NEXT:    pushl %ebx
 ; AVX512F-32-NEXT:    .cfi_def_cfa_offset 12
+; AVX512F-32-NEXT:    pushl %edi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    pushl %esi
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 20
 ; AVX512F-32-NEXT:    subl $60, %esp
-; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
-; AVX512F-32-NEXT:    .cfi_offset %esi, -12
-; AVX512F-32-NEXT:    .cfi_offset %ebx, -8
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT:    .cfi_offset %esi, -20
+; AVX512F-32-NEXT:    .cfi_offset %edi, -16
+; AVX512F-32-NEXT:    .cfi_offset %ebx, -12
+; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $16, %eax
@@ -2542,7 +2550,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    kmovd %ecx, %k5
 ; AVX512F-32-NEXT:    movb %bh, %cl
 ; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    movl %ebx, %esi
+; AVX512F-32-NEXT:    movl %ebx, %ebp
 ; AVX512F-32-NEXT:    andb $2, %cl
 ; AVX512F-32-NEXT:    shrb %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
@@ -2568,7 +2576,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    kshiftrq $5, %k7, %k0
 ; AVX512F-32-NEXT:    kxorq %k4, %k0, %k4
 ; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %esi, %ecx
+; AVX512F-32-NEXT:    movl %ebp, %ecx
 ; AVX512F-32-NEXT:    shrl $13, %ecx
 ; AVX512F-32-NEXT:    andb $1, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
@@ -2613,9 +2621,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    shrb $5, %cl
 ; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    movl %esi, %edx
+; AVX512F-32-NEXT:    movzwl %bp, %edx
+; AVX512F-32-NEXT:    movl %edx, %esi
+; AVX512F-32-NEXT:    movl %edx, %edi
 ; AVX512F-32-NEXT:    shrl $12, %edx
-; AVX512F-32-NEXT:    andl $15, %edx
 ; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
 ; AVX512F-32-NEXT:    kshiftrq $53, %k3, %k3
 ; AVX512F-32-NEXT:    kxorq %k7, %k3, %k3
@@ -2630,28 +2639,24 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    movl %eax, %edx
 ; AVX512F-32-NEXT:    shrb $6, %dl
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $14, %ecx
-; AVX512F-32-NEXT:    andl $3, %ecx
+; AVX512F-32-NEXT:    shrl $15, %esi
+; AVX512F-32-NEXT:    shrl $14, %edi
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
 ; AVX512F-32-NEXT:    kshiftrq $51, %k7, %k7
 ; AVX512F-32-NEXT:    kxorq %k3, %k7, %k3
 ; AVX512F-32-NEXT:    kshiftrq $13, %k3, %k7
 ; AVX512F-32-NEXT:    kxorq %k4, %k7, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $15, %ecx
-; AVX512F-32-NEXT:    andl $1, %ecx
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $50, %k4, %k4
 ; AVX512F-32-NEXT:    kxorq %k3, %k4, %k3
 ; AVX512F-32-NEXT:    kshiftrq $14, %k3, %k4
+; AVX512F-32-NEXT:    kmovd %edi, %k7
 ; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $49, %k4, %k4
 ; AVX512F-32-NEXT:    kxorq %k3, %k4, %k3
 ; AVX512F-32-NEXT:    kshiftrq $15, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
+; AVX512F-32-NEXT:    kmovd %esi, %k7
 ; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $48, %k4, %k4
@@ -2660,7 +2665,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    kmovd %eax, %k7
 ; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
 ; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    movl %esi, %edx
+; AVX512F-32-NEXT:    movl %ebp, %edx
 ; AVX512F-32-NEXT:    shrl $24, %edx
 ; AVX512F-32-NEXT:    # kill: def %al killed %al killed %eax def %eax
 ; AVX512F-32-NEXT:    shrb $7, %al
@@ -2700,19 +2705,19 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    shrb $3, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
 ; AVX512F-32-NEXT:    kshiftrq $43, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
-; AVX512F-32-NEXT:    kshiftrq $21, %k1, %k6
-; AVX512F-32-NEXT:    kxorq %k0, %k6, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $29, %ecx
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $42, %k6, %k6
 ; AVX512F-32-NEXT:    kxorq %k1, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $22, %k6, %k1
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k7
+; AVX512F-32-NEXT:    kshiftrq $21, %k6, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
 ; AVX512F-32-NEXT:    kmovd %ecx, %k1
+; AVX512F-32-NEXT:    movl %ebp, %ecx
+; AVX512F-32-NEXT:    shrl $29, %ecx
+; AVX512F-32-NEXT:    andb $1, %cl
+; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $42, %k0, %k0
+; AVX512F-32-NEXT:    kxorq %k6, %k0, %k6
+; AVX512F-32-NEXT:    kshiftrq $22, %k6, %k0
+; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
+; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    movl %ebx, %edx
 ; AVX512F-32-NEXT:    andb $2, %dl
 ; AVX512F-32-NEXT:    shrb %dl
@@ -2743,68 +2748,68 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    shrb $4, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $38, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k7
-; AVX512F-32-NEXT:    kshiftrq $26, %k7, %k4
-; AVX512F-32-NEXT:    kxorq %k2, %k4, %k2
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
+; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
+; AVX512F-32-NEXT:    kshiftrq $26, %k4, %k7
+; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
+; AVX512F-32-NEXT:    kmovd %ecx, %k2
 ; AVX512F-32-NEXT:    movl %ebx, %ecx
 ; AVX512F-32-NEXT:    shrb $5, %cl
 ; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    movl %esi, %edx
+; AVX512F-32-NEXT:    movl %ebp, %edx
 ; AVX512F-32-NEXT:    shrl $28, %edx
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $37, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $27, %k2, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $36, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k2, %k0, %k2
-; AVX512F-32-NEXT:    kshiftrq $28, %k2, %k0
+; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT:    kshiftrq $37, %k7, %k7
+; AVX512F-32-NEXT:    kxorq %k4, %k7, %k4
+; AVX512F-32-NEXT:    kshiftrq $27, %k4, %k7
+; AVX512F-32-NEXT:    kxorq %k1, %k7, %k1
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $36, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k4, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $28, %k1, %k4
 ; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
+; AVX512F-32-NEXT:    kxorq %k7, %k4, %k7
+; AVX512F-32-NEXT:    kmovd %ecx, %k4
 ; AVX512F-32-NEXT:    movl %ebx, %edx
 ; AVX512F-32-NEXT:    shrb $6, %dl
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $30, %ecx
+; AVX512F-32-NEXT:    movl %ebp, %ecx
+; AVX512F-32-NEXT:    shrl $31, %ecx
+; AVX512F-32-NEXT:    movl %ebp, %esi
+; AVX512F-32-NEXT:    shrl $30, %esi
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
 ; AVX512F-32-NEXT:    kshiftrq $35, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k2
-; AVX512F-32-NEXT:    kshiftrq $29, %k2, %k7
 ; AVX512F-32-NEXT:    kxorq %k1, %k7, %k1
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %esi, %ecx
-; AVX512F-32-NEXT:    shrl $31, %ecx
+; AVX512F-32-NEXT:    kshiftrq $29, %k1, %k7
+; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $34, %k0, %k0
+; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $30, %k0, %k1
+; AVX512F-32-NEXT:    kmovd %esi, %k7
+; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
 ; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $34, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $30, %k1, %k2
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k2
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $33, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k1
-; AVX512F-32-NEXT:    kshiftrq $31, %k1, %k2
+; AVX512F-32-NEXT:    kshiftrq $33, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $31, %k0, %k1
 ; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k2
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k1
-; AVX512F-32-NEXT:    kshiftrq $32, %k1, %k2
+; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $32, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k1
 ; AVX512F-32-NEXT:    kmovd %ebx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
 ; AVX512F-32-NEXT:    kmovd %edx, %k7
 ; AVX512F-32-NEXT:    movl %ebx, %ecx
 ; AVX512F-32-NEXT:    shrb $7, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $31, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k1
-; AVX512F-32-NEXT:    kshiftrq $33, %k1, %k2
-; AVX512F-32-NEXT:    kxorq %k5, %k2, %k2
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $30, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $34, %k2, %k1
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $31, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $33, %k0, %k1
+; AVX512F-32-NEXT:    kxorq %k5, %k1, %k1
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $30, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $34, %k0, %k1
 ; AVX512F-32-NEXT:    kxorq %k6, %k1, %k5
 ; AVX512F-32-NEXT:    kmovd %ecx, %k6
 ; AVX512F-32-NEXT:    movb %bh, %cl
@@ -2813,34 +2818,34 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    shrb %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
 ; AVX512F-32-NEXT:    kshiftrq $29, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k2, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $35, %k5, %k2
-; AVX512F-32-NEXT:    kxorq %k3, %k2, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
+; AVX512F-32-NEXT:    kxorq %k0, %k5, %k5
+; AVX512F-32-NEXT:    kshiftrq $35, %k5, %k0
+; AVX512F-32-NEXT:    kxorq %k3, %k0, %k3
+; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    shrb $2, %al
 ; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
 ; AVX512F-32-NEXT:    kshiftrq $28, %k3, %k3
 ; AVX512F-32-NEXT:    kxorq %k5, %k3, %k5
 ; AVX512F-32-NEXT:    kshiftrq $36, %k5, %k3
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k4
+; AVX512F-32-NEXT:    kxorq %k2, %k3, %k2
 ; AVX512F-32-NEXT:    kmovd %eax, %k3
 ; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $16, %eax
 ; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $27, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k5, %k4, %k5
-; AVX512F-32-NEXT:    kshiftrq $37, %k5, %k4
-; AVX512F-32-NEXT:    kxorq %k0, %k4, %k0
+; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT:    kshiftrq $27, %k2, %k2
+; AVX512F-32-NEXT:    kxorq %k5, %k2, %k2
+; AVX512F-32-NEXT:    kshiftrq $37, %k2, %k5
+; AVX512F-32-NEXT:    kxorq %k4, %k5, %k5
 ; AVX512F-32-NEXT:    kmovd %ecx, %k4
 ; AVX512F-32-NEXT:    movl %ebx, %ecx
 ; AVX512F-32-NEXT:    shrl $13, %ecx
 ; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $26, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k5, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $38, %k0, %k5
+; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
+; AVX512F-32-NEXT:    kshiftrq $26, %k5, %k5
+; AVX512F-32-NEXT:    kxorq %k2, %k5, %k2
+; AVX512F-32-NEXT:    kshiftrq $38, %k2, %k5
 ; AVX512F-32-NEXT:    kxorq %k7, %k5, %k7
 ; AVX512F-32-NEXT:    kmovd %ecx, %k5
 ; AVX512F-32-NEXT:    movl %eax, %edx
@@ -2848,11 +2853,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    shrb %dl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
 ; AVX512F-32-NEXT:    kshiftrq $25, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $39, %k7, %k0
-; AVX512F-32-NEXT:    kxorq %k6, %k0, %k6
-; AVX512F-32-NEXT:    kmovd %edx, %k0
-; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp) # 8-byte Spill
+; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
+; AVX512F-32-NEXT:    kshiftrq $39, %k7, %k2
+; AVX512F-32-NEXT:    kxorq %k6, %k2, %k6
+; AVX512F-32-NEXT:    kmovd %edx, %k2
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    andb $15, %cl
 ; AVX512F-32-NEXT:    movl %ecx, %edx
@@ -2863,46 +2867,44 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    kshiftrq $40, %k6, %k7
 ; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
 ; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    movl %ebx, %ecx
+; AVX512F-32-NEXT:    kmovq %k1, {{[0-9]+}}(%esp) # 8-byte Spill
+; AVX512F-32-NEXT:    movzwl %bx, %ecx
+; AVX512F-32-NEXT:    movl %ecx, %esi
+; AVX512F-32-NEXT:    movl %ecx, %edi
 ; AVX512F-32-NEXT:    shrl $12, %ecx
-; AVX512F-32-NEXT:    andl $15, %ecx
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
 ; AVX512F-32-NEXT:    kshiftrq $23, %k7, %k7
 ; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
 ; AVX512F-32-NEXT:    kshiftrq $41, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k2
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $14, %ecx
-; AVX512F-32-NEXT:    andl $3, %ecx
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $22, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k6, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $42, %k2, %k6
+; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
+; AVX512F-32-NEXT:    shrl $14, %edi
+; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $22, %k0, %k0
+; AVX512F-32-NEXT:    kxorq %k6, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $42, %k0, %k6
 ; AVX512F-32-NEXT:    kxorq %k3, %k6, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $15, %ecx
-; AVX512F-32-NEXT:    andl $1, %ecx
+; AVX512F-32-NEXT:    kmovd %edi, %k7
+; AVX512F-32-NEXT:    shrl $15, %esi
 ; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
 ; AVX512F-32-NEXT:    kshiftrq $21, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k2, %k3, %k2
-; AVX512F-32-NEXT:    kshiftrq $43, %k2, %k3
+; AVX512F-32-NEXT:    kxorq %k0, %k3, %k0
+; AVX512F-32-NEXT:    kshiftrq $43, %k0, %k3
 ; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k6
+; AVX512F-32-NEXT:    kmovd %esi, %k6
 ; AVX512F-32-NEXT:    shrb $3, %dl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
 ; AVX512F-32-NEXT:    kshiftrq $20, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k2, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $44, %k3, %k2
-; AVX512F-32-NEXT:    kxorq %k0, %k2, %k0
-; AVX512F-32-NEXT:    kmovd %edx, %k2
+; AVX512F-32-NEXT:    kxorq %k0, %k3, %k3
+; AVX512F-32-NEXT:    kshiftrq $44, %k3, %k0
+; AVX512F-32-NEXT:    kxorq %k1, %k0, %k1
+; AVX512F-32-NEXT:    kmovd %edx, %k0
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $19, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k3, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $45, %k0, %k3
+; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $19, %k1, %k1
+; AVX512F-32-NEXT:    kxorq %k3, %k1, %k1
+; AVX512F-32-NEXT:    kshiftrq $45, %k1, %k3
 ; AVX512F-32-NEXT:    kxorq %k5, %k3, %k4
 ; AVX512F-32-NEXT:    kmovd %ecx, %k3
 ; AVX512F-32-NEXT:    movl %eax, %ecx
@@ -2910,21 +2912,21 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    andb $1, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
 ; AVX512F-32-NEXT:    kshiftrq $18, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k0, %k4, %k0
-; AVX512F-32-NEXT:    kshiftrq $46, %k0, %k4
+; AVX512F-32-NEXT:    kxorq %k1, %k4, %k1
+; AVX512F-32-NEXT:    kshiftrq $46, %k1, %k4
 ; AVX512F-32-NEXT:    kxorq %k7, %k4, %k5
 ; AVX512F-32-NEXT:    kmovd %ecx, %k4
 ; AVX512F-32-NEXT:    movl %eax, %ecx
 ; AVX512F-32-NEXT:    shrb $6, %cl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
 ; AVX512F-32-NEXT:    kshiftrq $17, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k0, %k5, %k0
-; AVX512F-32-NEXT:    kshiftrq $47, %k0, %k5
+; AVX512F-32-NEXT:    kxorq %k1, %k5, %k1
+; AVX512F-32-NEXT:    kshiftrq $47, %k1, %k5
 ; AVX512F-32-NEXT:    kxorq %k6, %k5, %k5
 ; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
 ; AVX512F-32-NEXT:    kshiftrq $16, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k0, %k5, %k0
-; AVX512F-32-NEXT:    kshiftrq $48, %k0, %k5
+; AVX512F-32-NEXT:    kxorq %k1, %k5, %k1
+; AVX512F-32-NEXT:    kshiftrq $48, %k1, %k5
 ; AVX512F-32-NEXT:    kmovd %eax, %k6
 ; AVX512F-32-NEXT:    kxorq %k6, %k5, %k6
 ; AVX512F-32-NEXT:    kmovd %ecx, %k5
@@ -2934,17 +2936,17 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    shrb $7, %al
 ; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
 ; AVX512F-32-NEXT:    kshiftrq $15, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k0, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $49, %k6, %k0
-; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k0
+; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
+; AVX512F-32-NEXT:    kshiftrq $49, %k1, %k6
+; AVX512F-32-NEXT:    kxorq %k2, %k6, %k6
+; AVX512F-32-NEXT:    kmovd %eax, %k2
 ; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $14, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
-; AVX512F-32-NEXT:    kshiftrq $50, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
+; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT:    kshiftrq $14, %k6, %k6
+; AVX512F-32-NEXT:    kxorq %k1, %k6, %k6
+; AVX512F-32-NEXT:    kshiftrq $50, %k6, %k1
+; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload
+; AVX512F-32-NEXT:    kxorq %k7, %k1, %k7
 ; AVX512F-32-NEXT:    kmovd %edx, %k1
 ; AVX512F-32-NEXT:    # kill: def %dl killed %dl killed %edx def %edx
 ; AVX512F-32-NEXT:    andb $15, %dl
@@ -2954,8 +2956,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    kshiftrq $13, %k7, %k7
 ; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
 ; AVX512F-32-NEXT:    kshiftrq $51, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k2
+; AVX512F-32-NEXT:    kxorq %k0, %k7, %k7
+; AVX512F-32-NEXT:    kmovd %eax, %k0
 ; AVX512F-32-NEXT:    movl %edx, %eax
 ; AVX512F-32-NEXT:    shrb $2, %dl
 ; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
@@ -2984,20 +2986,20 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    kshiftrq $9, %k7, %k7
 ; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
 ; AVX512F-32-NEXT:    kshiftrq $55, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $8, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k6, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $56, %k0, %k6
+; AVX512F-32-NEXT:    kxorq %k2, %k7, %k2
+; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT:    kshiftrq $8, %k2, %k2
+; AVX512F-32-NEXT:    kxorq %k6, %k2, %k2
+; AVX512F-32-NEXT:    kshiftrq $56, %k2, %k6
 ; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
 ; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
 ; AVX512F-32-NEXT:    kshiftrq $7, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
-; AVX512F-32-NEXT:    kshiftrq $57, %k0, %k1
 ; AVX512F-32-NEXT:    kxorq %k2, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $6, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kshiftrq $57, %k1, %k2
+; AVX512F-32-NEXT:    kxorq %k0, %k2, %k0
+; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT:    kshiftrq $6, %k0, %k0
+; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
 ; AVX512F-32-NEXT:    kshiftrq $58, %k0, %k1
 ; AVX512F-32-NEXT:    kxorq %k3, %k1, %k1
 ; AVX512F-32-NEXT:    movl %ebx, %eax
@@ -3060,11 +3062,13 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT:    addl %esi, %eax
+; AVX512F-32-NEXT:    addl %ebp, %eax
 ; AVX512F-32-NEXT:    adcl %ebx, %edx
 ; AVX512F-32-NEXT:    addl $60, %esp
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    popl %edi
 ; AVX512F-32-NEXT:    popl %ebx
+; AVX512F-32-NEXT:    popl %ebp
 ; AVX512F-32-NEXT:    vzeroupper
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)

Modified: llvm/trunk/test/CodeGen/X86/zext-demanded.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-demanded.ll?rev=323175&r1=323174&r2=323175&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/zext-demanded.ll (original)
+++ llvm/trunk/test/CodeGen/X86/zext-demanded.ll Mon Jan 22 21:45:52 2018
@@ -27,51 +27,45 @@ define i32 @test2(i32 %x) {
   ret i32 %z
 }
 
-; TODO: We need to stop moving the and across the shift to get a movzx
 define i32 @test3(i32 %x) {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl %edi
-; CHECK-NEXT:    andl $127, %edi
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    shrl %eax
 ; CHECK-NEXT:    retq
   %y = and i32 %x, 255
   %z = lshr i32 %y, 1
   ret i32 %z
 }
 
-; TODO: We need to stop moving the and across the shift to get a movzx
 define i16 @test4(i16 %x) {
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl %edi
-; CHECK-NEXT:    andl $127, %edi
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    shrl %eax
+; CHECK-NEXT:    # kill: def %ax killed %ax killed %eax
 ; CHECK-NEXT:    retq
   %y = and i16 %x, 255
   %z = lshr i16 %y, 1
   ret i16 %z
 }
 
-; TODO: We need to stop moving the and across the shift to get a movzx
 define i16 @test5(i16 %x) {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $9, %edi
-; CHECK-NEXT:    andl $127, %edi
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movzwl %di, %eax
+; CHECK-NEXT:    shrl $9, %eax
+; CHECK-NEXT:    # kill: def %ax killed %ax killed %eax
 ; CHECK-NEXT:    retq
   %y = lshr i16 %x, 9
   ret i16 %y
 }
 
-; TODO: We need to stop moving the and across the shift to get a movzx
 define i32 @test6(i32 %x) {
 ; CHECK-LABEL: test6:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    shrl $9, %edi
-; CHECK-NEXT:    andl $127, %edi
-; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movzwl %di, %eax
+; CHECK-NEXT:    shrl $9, %eax
 ; CHECK-NEXT:    retq
   %y = and i32 %x, 65535
   %z = lshr i32 %y, 9




More information about the llvm-commits mailing list