[llvm] r324057 - [X86] Legalize (v64i1 (bitcast (i64 X))) on 32-bit targets by extracting 32-bit halves from i32, bitcasting each to v32i1, and concatenating.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 1 21:59:33 PST 2018


Author: ctopper
Date: Thu Feb  1 21:59:33 2018
New Revision: 324057

URL: http://llvm.org/viewvc/llvm-project?rev=324057&view=rev
Log:
[X86] Legalize (v64i1 (bitcast (i64 X))) on 32-bit targets by extracting 32-bit halves from i32, bitcasting each to v32i1, and concatenating.

This prevents the scalarization that would otherwise occur.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324057&r1=324056&r2=324057&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Feb  1 21:59:33 2018
@@ -23602,6 +23602,22 @@ static SDValue LowerBITCAST(SDValue Op,
   MVT SrcVT = Op.getOperand(0).getSimpleValueType();
   MVT DstVT = Op.getSimpleValueType();
 
+  // Legalize (v64i1 (bitcast i64 (X))) by splitting the i64, bitcasting each
+  // half to v32i1 and concatenating the result.
+  if (SrcVT == MVT::i64 && DstVT == MVT::v64i1) {
+    assert(!Subtarget.is64Bit() && "Expected 32-bit mode");
+    assert(Subtarget.hasBWI() && "Expected BWI target");
+    SDValue Op0 = Op->getOperand(0);
+    SDLoc dl(Op);
+    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+                             DAG.getIntPtrConstant(0, dl));
+    Lo = DAG.getBitcast(MVT::v32i1, Lo);
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+                             DAG.getIntPtrConstant(1, dl));
+    Hi = DAG.getBitcast(MVT::v32i1, Hi);
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
+  }
+
   if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
       SrcVT == MVT::i64) {
     assert(Subtarget.hasSSE2() && "Requires at least SSE2!");

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=324057&r1=324056&r2=324057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Thu Feb  1 21:59:33 2018
@@ -107,535 +107,11 @@ entry:
 define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext %__A)  {
 ; X32-LABEL: test_mm512_mask_set1_epi8:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    kmovd %eax, %k0
-; X32-NEXT:    kshiftrq $1, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $62, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $2, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $61, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $3, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $60, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $4, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $59, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $5, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $58, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $6, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $57, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $7, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $56, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $8, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $55, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $9, %k0, %k1
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $54, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $10, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    movzwl %si, %edx
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    shrl $12, %ecx
-; X32-NEXT:    kmovd %ecx, %k3
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $13, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    shrl $14, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    shrl $16, %eax
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    andb $2, %bl
-; X32-NEXT:    shrb %bl
-; X32-NEXT:    kmovd %ebx, %k6
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k7
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $53, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $11, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $52, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $12, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $51, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $50, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $49, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    shrl $15, %edx
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $48, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $16, %k0, %k1
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $47, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $17, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $46, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $18, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $45, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $19, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $44, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $20, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $43, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $21, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $42, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $22, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $41, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $23, %k0, %k1
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $40, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $24, %k0, %k1
-; X32-NEXT:    movl %esi, %edx
-; X32-NEXT:    shrl $24, %edx
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $39, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $25, %k0, %k1
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $38, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $26, %k0, %k1
-; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $37, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $27, %k0, %k1
-; X32-NEXT:    shrb $3, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $36, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $28, %k0, %k1
-; X32-NEXT:    movl %esi, %ecx
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $28, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $35, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $29, %k0, %k1
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $29, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $34, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $30, %k0, %k1
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $30, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $33, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $31, %k0, %k1
-; X32-NEXT:    shrl $31, %ecx
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $32, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %ebx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $31, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $33, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $30, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $34, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $2, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $29, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $35, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $28, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $36, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $27, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $37, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $26, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $38, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $25, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $39, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $24, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $40, %k0, %k1
-; X32-NEXT:    movb %bh, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movb %bh, %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $2, %cl
-; X32-NEXT:    kmovd %ecx, %k3
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $13, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ebx, %ecx
-; X32-NEXT:    shrl $16, %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $23, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $41, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $22, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $42, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    movzwl %bx, %eax
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    kmovd %esi, %k3
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $21, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $43, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    shrl $15, %eax
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $20, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $44, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $19, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $45, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $18, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $46, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $17, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $47, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $16, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $48, %k0, %k1
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $15, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $49, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $14, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $50, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $13, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $51, %k0, %k1
-; X32-NEXT:    shrb $3, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $12, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k4
-; X32-NEXT:    kshiftrq $52, %k4, %k0
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k5
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k0
-; X32-NEXT:    movl %ebx, %ecx
-; X32-NEXT:    shrl $24, %ecx
-; X32-NEXT:    kmovd %ecx, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k3
-; X32-NEXT:    kshiftlq $63, %k5, %k5
-; X32-NEXT:    kshiftrq $11, %k5, %k5
-; X32-NEXT:    kxorq %k5, %k4, %k4
-; X32-NEXT:    kshiftrq $53, %k4, %k5
-; X32-NEXT:    kxorq %k6, %k5, %k5
-; X32-NEXT:    kshiftlq $63, %k5, %k5
-; X32-NEXT:    kshiftrq $10, %k5, %k5
-; X32-NEXT:    kxorq %k5, %k4, %k5
-; X32-NEXT:    kshiftrq $54, %k5, %k4
-; X32-NEXT:    kxorq %k7, %k4, %k6
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $29, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    kshiftlq $63, %k6, %k6
-; X32-NEXT:    kshiftrq $9, %k6, %k6
-; X32-NEXT:    kxorq %k6, %k5, %k5
-; X32-NEXT:    kshiftrq $55, %k5, %k6
-; X32-NEXT:    kxorq %k0, %k6, %k0
-; X32-NEXT:    kshiftlq $63, %k0, %k0
-; X32-NEXT:    kshiftrq $8, %k0, %k0
-; X32-NEXT:    kxorq %k0, %k5, %k0
-; X32-NEXT:    kshiftrq $56, %k0, %k5
-; X32-NEXT:    kxorq %k1, %k5, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $28, %eax
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $30, %eax
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $7, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $57, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $6, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $58, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $5, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $59, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $4, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $60, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $3, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $61, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $2, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $62, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    shrl $31, %ebx
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $1, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftlq $1, %k0, %k0
-; X32-NEXT:    kshiftrq $1, %k0, %k0
-; X32-NEXT:    kshiftlq $63, %k2, %k1
-; X32-NEXT:    korq %k1, %k0, %k1
+; X32-NEXT:    kunpckdq %k1, %k0, %k1
 ; X32-NEXT:    vpbroadcastb %eax, %zmm0 {%k1}
-; X32-NEXT:    popl %esi
-; X32-NEXT:    popl %ebx
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm512_mask_set1_epi8:
@@ -656,535 +132,11 @@ define <8 x i64> @test_mm512_mask_set1_e
 define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A)  {
 ; X32-LABEL: test_mm512_maskz_set1_epi8:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    kmovd %eax, %k0
-; X32-NEXT:    kshiftrq $1, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $62, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $2, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $61, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $3, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $60, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $4, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $59, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $5, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $58, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $6, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $57, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $7, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $56, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $8, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $55, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $9, %k0, %k1
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $54, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $10, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    movzwl %si, %edx
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    shrl $12, %ecx
-; X32-NEXT:    kmovd %ecx, %k3
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $13, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    shrl $14, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    shrl $16, %eax
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    andb $2, %bl
-; X32-NEXT:    shrb %bl
-; X32-NEXT:    kmovd %ebx, %k6
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k7
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $53, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $11, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $52, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $12, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $51, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $50, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $49, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    shrl $15, %edx
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $48, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $16, %k0, %k1
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $47, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $17, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $46, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $18, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $45, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $19, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $44, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $20, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $43, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $21, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $42, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $22, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $41, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $23, %k0, %k1
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $40, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $24, %k0, %k1
-; X32-NEXT:    movl %esi, %edx
-; X32-NEXT:    shrl $24, %edx
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $39, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $25, %k0, %k1
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $38, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $26, %k0, %k1
-; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $37, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $27, %k0, %k1
-; X32-NEXT:    shrb $3, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $36, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $28, %k0, %k1
-; X32-NEXT:    movl %esi, %ecx
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $28, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $35, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $29, %k0, %k1
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $29, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $34, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $30, %k0, %k1
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $30, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $33, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $31, %k0, %k1
-; X32-NEXT:    shrl $31, %ecx
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $32, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %ebx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $31, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $33, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $30, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $34, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $2, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $29, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $35, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $28, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $36, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $27, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $37, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $26, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $38, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $25, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $39, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $24, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $40, %k0, %k1
-; X32-NEXT:    movb %bh, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movb %bh, %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $2, %cl
-; X32-NEXT:    kmovd %ecx, %k3
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $13, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ebx, %ecx
-; X32-NEXT:    shrl $16, %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $23, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $41, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $22, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $42, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    movzwl %bx, %eax
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    kmovd %esi, %k3
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $21, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $43, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    shrl $15, %eax
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $20, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $44, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $19, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $45, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $18, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $46, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $17, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $47, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $16, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $48, %k0, %k1
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $15, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $49, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $14, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $50, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $13, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $51, %k0, %k1
-; X32-NEXT:    shrb $3, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $12, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k4
-; X32-NEXT:    kshiftrq $52, %k4, %k0
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k5
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k0
-; X32-NEXT:    movl %ebx, %ecx
-; X32-NEXT:    shrl $24, %ecx
-; X32-NEXT:    kmovd %ecx, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k3
-; X32-NEXT:    kshiftlq $63, %k5, %k5
-; X32-NEXT:    kshiftrq $11, %k5, %k5
-; X32-NEXT:    kxorq %k5, %k4, %k4
-; X32-NEXT:    kshiftrq $53, %k4, %k5
-; X32-NEXT:    kxorq %k6, %k5, %k5
-; X32-NEXT:    kshiftlq $63, %k5, %k5
-; X32-NEXT:    kshiftrq $10, %k5, %k5
-; X32-NEXT:    kxorq %k5, %k4, %k5
-; X32-NEXT:    kshiftrq $54, %k5, %k4
-; X32-NEXT:    kxorq %k7, %k4, %k6
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $29, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    kshiftlq $63, %k6, %k6
-; X32-NEXT:    kshiftrq $9, %k6, %k6
-; X32-NEXT:    kxorq %k6, %k5, %k5
-; X32-NEXT:    kshiftrq $55, %k5, %k6
-; X32-NEXT:    kxorq %k0, %k6, %k0
-; X32-NEXT:    kshiftlq $63, %k0, %k0
-; X32-NEXT:    kshiftrq $8, %k0, %k0
-; X32-NEXT:    kxorq %k0, %k5, %k0
-; X32-NEXT:    kshiftrq $56, %k0, %k5
-; X32-NEXT:    kxorq %k1, %k5, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $28, %eax
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $30, %eax
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $7, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $57, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $6, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $58, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $5, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $59, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $4, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $60, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $3, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $61, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $2, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $62, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    shrl $31, %ebx
-; X32-NEXT:    kmovd %ebx, %k2
+; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
 ; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $1, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftlq $1, %k0, %k0
-; X32-NEXT:    kshiftrq $1, %k0, %k0
-; X32-NEXT:    kshiftlq $63, %k2, %k1
-; X32-NEXT:    korq %k1, %k0, %k1
+; X32-NEXT:    kunpckdq %k1, %k0, %k1
 ; X32-NEXT:    vpbroadcastb %eax, %zmm0 {%k1} {z}
-; X32-NEXT:    popl %esi
-; X32-NEXT:    popl %ebx
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm512_maskz_set1_epi8:
@@ -1672,537 +624,14 @@ entry:
 define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X32-LABEL: test_mm512_mask_test_epi8_mask:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    kmovd %eax, %k0
-; X32-NEXT:    kshiftrq $1, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $62, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $2, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $61, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $3, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $60, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $4, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $59, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $5, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $58, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $6, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $57, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $7, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $56, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $8, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $55, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $9, %k0, %k1
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $54, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $10, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    movzwl %si, %edx
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    shrl $12, %ecx
-; X32-NEXT:    kmovd %ecx, %k3
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $13, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    shrl $14, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    shrl $16, %eax
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    andb $2, %bl
-; X32-NEXT:    shrb %bl
-; X32-NEXT:    kmovd %ebx, %k6
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k7
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $53, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $11, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $52, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $12, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $51, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $50, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $49, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    shrl $15, %edx
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $48, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $16, %k0, %k1
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $47, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $17, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $46, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $18, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $45, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $19, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $44, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $20, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $43, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $21, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $42, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $22, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $41, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $23, %k0, %k1
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $40, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $24, %k0, %k1
-; X32-NEXT:    movl %esi, %edx
-; X32-NEXT:    shrl $24, %edx
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $39, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $25, %k0, %k1
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $38, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $26, %k0, %k1
-; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $37, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $27, %k0, %k1
-; X32-NEXT:    shrb $3, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $36, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $28, %k0, %k1
-; X32-NEXT:    movl %esi, %ecx
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $28, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $35, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $29, %k0, %k1
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $29, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $34, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $30, %k0, %k1
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $30, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $33, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $31, %k0, %k1
-; X32-NEXT:    shrl $31, %ecx
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $32, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %ebx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $31, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $33, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $30, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $34, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $2, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $29, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $35, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $28, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $36, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $27, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $37, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $26, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $38, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $25, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $39, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $24, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $40, %k0, %k1
-; X32-NEXT:    movb %bh, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movb %bh, %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $2, %cl
-; X32-NEXT:    kmovd %ecx, %k3
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $13, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ebx, %ecx
-; X32-NEXT:    shrl $16, %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $23, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $41, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $22, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $42, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $21, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $43, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $20, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $44, %k0, %k1
-; X32-NEXT:    movzwl %bx, %eax
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $19, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $45, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $18, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $46, %k0, %k1
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $17, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $47, %k0, %k1
-; X32-NEXT:    shrl $15, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $16, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $48, %k0, %k1
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $15, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $49, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $14, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $50, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $13, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $51, %k0, %k1
-; X32-NEXT:    shrb $3, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $12, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $52, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k3
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %ebx, %ecx
-; X32-NEXT:    shrl $24, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $11, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $53, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $10, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $54, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $9, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $55, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $8, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $56, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $7, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $57, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $6, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $58, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $5, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $59, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $4, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $60, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $28, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $3, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $61, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $29, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $2, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $62, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $30, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $1, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftlq $1, %k0, %k0
-; X32-NEXT:    kshiftrq $1, %k0, %k0
-; X32-NEXT:    shrl $31, %ebx
-; X32-NEXT:    kmovd %ebx, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    korq %k1, %k0, %k1
-; X32-NEXT:    vptestmb %zmm0, %zmm1, %k0 {%k1}
-; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %k0, %eax
-; X32-NEXT:    kmovd %k1, %edx
-; X32-NEXT:    popl %esi
-; X32-NEXT:    popl %ebx
+; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; X32-NEXT:    vptestmb %zmm0, %zmm1, %k2
+; X32-NEXT:    kandd %k1, %k2, %k1
+; X32-NEXT:    kmovd %k1, %eax
+; X32-NEXT:    kshiftrq $32, %k2, %k1
+; X32-NEXT:    kandd %k0, %k1, %k0
+; X32-NEXT:    kmovd %k0, %edx
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
@@ -2298,537 +727,14 @@ entry:
 define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
 ; X32-LABEL: test_mm512_mask_testn_epi8_mask:
 ; X32:       # %bb.0: # %entry
-; X32-NEXT:    pushl %ebx
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 12
-; X32-NEXT:    .cfi_offset %esi, -12
-; X32-NEXT:    .cfi_offset %ebx, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    kmovd %eax, %k0
-; X32-NEXT:    kshiftrq $1, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $62, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $2, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $61, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $3, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $60, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $4, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $59, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $5, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $58, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $6, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $57, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $7, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $56, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $8, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $55, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $9, %k0, %k1
-; X32-NEXT:    andb $2, %cl
-; X32-NEXT:    shrb %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $54, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $10, %k0, %k1
-; X32-NEXT:    movb %ah, %cl
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    shrb $2, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    movzwl %si, %edx
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    shrl $12, %ecx
-; X32-NEXT:    kmovd %ecx, %k3
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $13, %ecx
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %edx, %ecx
-; X32-NEXT:    shrl $14, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    shrl $16, %eax
-; X32-NEXT:    movl %eax, %ebx
-; X32-NEXT:    andb $2, %bl
-; X32-NEXT:    shrb %bl
-; X32-NEXT:    kmovd %ebx, %k6
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %ebx
-; X32-NEXT:    shrb $2, %bl
-; X32-NEXT:    kmovd %ebx, %k7
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $53, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $11, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $52, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $12, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $51, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $13, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $50, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $14, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $49, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $15, %k0, %k1
-; X32-NEXT:    shrl $15, %edx
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $48, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $16, %k0, %k1
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $47, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $17, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $46, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $18, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $45, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $19, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $44, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $20, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $4, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $43, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $21, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $5, %cl
-; X32-NEXT:    andb $1, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $42, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $22, %k0, %k1
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $6, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $41, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $23, %k0, %k1
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $40, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $24, %k0, %k1
-; X32-NEXT:    movl %esi, %edx
-; X32-NEXT:    shrl $24, %edx
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $39, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $25, %k0, %k1
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $38, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $26, %k0, %k1
-; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $37, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $27, %k0, %k1
-; X32-NEXT:    shrb $3, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $36, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $28, %k0, %k1
-; X32-NEXT:    movl %esi, %ecx
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $28, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $35, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $29, %k0, %k1
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $29, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $34, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $30, %k0, %k1
-; X32-NEXT:    movl %esi, %eax
-; X32-NEXT:    shrl $30, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $33, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $31, %k0, %k1
-; X32-NEXT:    shrl $31, %ecx
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $32, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %ebx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $31, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $33, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $30, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $34, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $2, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $29, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $35, %k0, %k1
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $28, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $36, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $27, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $37, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $26, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $38, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $25, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $39, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrb $7, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $24, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $40, %k0, %k1
-; X32-NEXT:    movb %bh, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movb %bh, %al
-; X32-NEXT:    andb $15, %al
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrb $2, %cl
-; X32-NEXT:    kmovd %ecx, %k3
-; X32-NEXT:    shrb $3, %al
-; X32-NEXT:    kmovd %eax, %k4
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $13, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k5
-; X32-NEXT:    movl %ebx, %ecx
-; X32-NEXT:    shrl $16, %ecx
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    andb $15, %dl
-; X32-NEXT:    movl %edx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $23, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $41, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $22, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $42, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $21, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $43, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $20, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $44, %k0, %k1
-; X32-NEXT:    movzwl %bx, %eax
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $12, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $19, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $45, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $18, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $46, %k0, %k1
-; X32-NEXT:    movl %eax, %esi
-; X32-NEXT:    shrl $14, %esi
-; X32-NEXT:    kmovd %esi, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $17, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $47, %k0, %k1
-; X32-NEXT:    shrl $15, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $16, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $48, %k0, %k1
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $15, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $49, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $14, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $50, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $13, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $51, %k0, %k1
-; X32-NEXT:    shrb $3, %dl
-; X32-NEXT:    kmovd %edx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $12, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $52, %k0, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $4, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $5, %al
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $6, %al
-; X32-NEXT:    kmovd %eax, %k3
-; X32-NEXT:    shrb $7, %cl
-; X32-NEXT:    kmovd %ecx, %k4
-; X32-NEXT:    movl %ebx, %ecx
-; X32-NEXT:    shrl $24, %ecx
-; X32-NEXT:    kmovd %ecx, %k5
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    andb $2, %al
-; X32-NEXT:    shrb %al
-; X32-NEXT:    kmovd %eax, %k6
-; X32-NEXT:    andb $15, %cl
-; X32-NEXT:    movl %ecx, %eax
-; X32-NEXT:    shrb $2, %al
-; X32-NEXT:    kmovd %eax, %k7
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $11, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $53, %k0, %k1
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $10, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $54, %k0, %k1
-; X32-NEXT:    kxorq %k3, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $9, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $55, %k0, %k1
-; X32-NEXT:    kxorq %k4, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $8, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $56, %k0, %k1
-; X32-NEXT:    kxorq %k5, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $7, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $57, %k0, %k1
-; X32-NEXT:    kxorq %k6, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $6, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $58, %k0, %k1
-; X32-NEXT:    kxorq %k7, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $5, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $59, %k0, %k1
-; X32-NEXT:    shrb $3, %cl
-; X32-NEXT:    kmovd %ecx, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $4, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $60, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $28, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $3, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $61, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $29, %eax
-; X32-NEXT:    andb $1, %al
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $2, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftrq $62, %k0, %k1
-; X32-NEXT:    movl %ebx, %eax
-; X32-NEXT:    shrl $30, %eax
-; X32-NEXT:    kmovd %eax, %k2
-; X32-NEXT:    kxorq %k2, %k1, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    kshiftrq $1, %k1, %k1
-; X32-NEXT:    kxorq %k1, %k0, %k0
-; X32-NEXT:    kshiftlq $1, %k0, %k0
-; X32-NEXT:    kshiftrq $1, %k0, %k0
-; X32-NEXT:    shrl $31, %ebx
-; X32-NEXT:    kmovd %ebx, %k1
-; X32-NEXT:    kshiftlq $63, %k1, %k1
-; X32-NEXT:    korq %k1, %k0, %k1
-; X32-NEXT:    vptestnmb %zmm0, %zmm1, %k0 {%k1}
-; X32-NEXT:    kshiftrq $32, %k0, %k1
-; X32-NEXT:    kmovd %k0, %eax
-; X32-NEXT:    kmovd %k1, %edx
-; X32-NEXT:    popl %esi
-; X32-NEXT:    popl %ebx
+; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; X32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; X32-NEXT:    vptestnmb %zmm0, %zmm1, %k2
+; X32-NEXT:    kandd %k1, %k2, %k1
+; X32-NEXT:    kmovd %k1, %eax
+; X32-NEXT:    kshiftrq $32, %k2, %k1
+; X32-NEXT:    kandd %k0, %k1, %k0
+; X32-NEXT:    kmovd %k0, %edx
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=324057&r1=324056&r2=324057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Thu Feb  1 21:59:33 2018
@@ -1805,569 +1805,58 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    .cfi_offset %edi, -16
 ; AVX512F-32-NEXT:    .cfi_offset %ebx, -12
 ; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrl $16, %ecx
-; AVX512F-32-NEXT:    movl %ecx, %esi
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    andb $15, %cl
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
-; AVX512F-32-NEXT:    movl %ecx, %ebx
-; AVX512F-32-NEXT:    shrb $2, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movb %ah, %dl
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    shrb $3, %bl
-; AVX512F-32-NEXT:    kmovd %ebx, %k0
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $5, %cl
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $6, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k6
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $7, %cl
-; AVX512F-32-NEXT:    kmovd %eax, %k5
-; AVX512F-32-NEXT:    kshiftrq $1, %k5, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $62, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k5, %k7
-; AVX512F-32-NEXT:    kshiftrq $2, %k7, %k1
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k2
-; AVX512F-32-NEXT:    kmovd %ecx, %k5
-; AVX512F-32-NEXT:    movb %ah, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    movl %eax, %ebp
-; AVX512F-32-NEXT:    andb $2, %cl
-; AVX512F-32-NEXT:    shrb %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $61, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $3, %k7, %k2
-; AVX512F-32-NEXT:    kxorq %k0, %k2, %k0
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %edx, %ecx
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $60, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftrq $4, %k0, %k7
-; AVX512F-32-NEXT:    kxorq %k3, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %edx, %k3
-; AVX512F-32-NEXT:    movl %esi, %eax
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $59, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kshiftrq $5, %k7, %k0
-; AVX512F-32-NEXT:    kxorq %k4, %k0, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %ebp, %ecx
-; AVX512F-32-NEXT:    shrl $13, %ecx
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $58, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $6, %k7, %k4
-; AVX512F-32-NEXT:    kxorq %k6, %k4, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %eax, %ebx
-; AVX512F-32-NEXT:    andb $2, %bl
-; AVX512F-32-NEXT:    shrb %bl
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $57, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
-; AVX512F-32-NEXT:    kshiftrq $7, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k5, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ebx, %k5
-; AVX512F-32-NEXT:    movl %edx, %ecx
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $56, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k7
-; AVX512F-32-NEXT:    kshiftrq $8, %k7, %k6
-; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
-; AVX512F-32-NEXT:    kmovd %edx, %k6
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $55, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $9, %k7, %k1
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k2
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $54, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $10, %k7, %k2
-; AVX512F-32-NEXT:    kxorq %k3, %k2, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $5, %cl
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    movzwl %bp, %edx
-; AVX512F-32-NEXT:    movl %edx, %esi
-; AVX512F-32-NEXT:    movl %edx, %edi
-; AVX512F-32-NEXT:    shrl $12, %edx
-; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $53, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k3, %k7, %k3
-; AVX512F-32-NEXT:    kshiftrq $11, %k3, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $52, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $12, %k3, %k0
-; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $6, %cl
-; AVX512F-32-NEXT:    shrl $15, %esi
-; AVX512F-32-NEXT:    shrl $14, %edi
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $51, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $13, %k3, %k7
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k4
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $50, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $14, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %edi, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $49, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $15, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %esi, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $48, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $16, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %eax, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %ebp, %ecx
-; AVX512F-32-NEXT:    shrl $24, %ecx
-; AVX512F-32-NEXT:    # kill: def $al killed $al killed $eax def $eax
-; AVX512F-32-NEXT:    shrb $7, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $47, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $17, %k3, %k4
-; AVX512F-32-NEXT:    kxorq %k5, %k4, %k4
-; AVX512F-32-NEXT:    kmovd %eax, %k5
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $46, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k4
-; AVX512F-32-NEXT:    kshiftrq $18, %k4, %k3
-; AVX512F-32-NEXT:    kxorq %k6, %k3, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
-; AVX512F-32-NEXT:    andb $15, %cl
-; AVX512F-32-NEXT:    andb $2, %al
-; AVX512F-32-NEXT:    shrb %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $45, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k4, %k6
-; AVX512F-32-NEXT:    kshiftrq $19, %k6, %k4
-; AVX512F-32-NEXT:    kxorq %k1, %k4, %k1
-; AVX512F-32-NEXT:    kmovd %eax, %k4
-; AVX512F-32-NEXT:    movl %ecx, %edx
-; AVX512F-32-NEXT:    shrb $2, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $44, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
-; AVX512F-32-NEXT:    kshiftrq $20, %k1, %k6
-; AVX512F-32-NEXT:    kxorq %k2, %k6, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    andb $15, %al
-; AVX512F-32-NEXT:    shrb $3, %dl
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $43, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k1, %k6
-; AVX512F-32-NEXT:    kshiftrq $21, %k6, %k1
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
-; AVX512F-32-NEXT:    kmovd %edx, %k1
-; AVX512F-32-NEXT:    movl %ebp, %ecx
-; AVX512F-32-NEXT:    shrl $29, %ecx
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $42, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $22, %k6, %k0
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    andb $2, %cl
-; AVX512F-32-NEXT:    shrb %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $41, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $23, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k5, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k5
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $2, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $40, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k7
-; AVX512F-32-NEXT:    kshiftrq $24, %k7, %k6
-; AVX512F-32-NEXT:    kxorq %k3, %k6, %k3
-; AVX512F-32-NEXT:    kmovd %eax, %k6
-; AVX512F-32-NEXT:    movb %bh, %al
-; AVX512F-32-NEXT:    andb $15, %al
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $39, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k3, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $25, %k7, %k3
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $38, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k4
-; AVX512F-32-NEXT:    kshiftrq $26, %k4, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrb $5, %cl
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    movl %ebp, %edx
-; AVX512F-32-NEXT:    shrl $28, %edx
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $37, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $27, %k4, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $36, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k4, %k1
-; AVX512F-32-NEXT:    kshiftrq $28, %k1, %k4
-; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrb $6, %cl
-; AVX512F-32-NEXT:    movl %ebp, %edx
-; AVX512F-32-NEXT:    shrl $31, %edx
-; AVX512F-32-NEXT:    movl %ebp, %esi
-; AVX512F-32-NEXT:    shrl $30, %esi
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $35, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $29, %k1, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $34, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
-; AVX512F-32-NEXT:    kshiftrq $30, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %esi, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $33, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $31, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $32, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %ebx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrb $7, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $31, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $33, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k5, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $30, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $34, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k6, %k1, %k5
-; AVX512F-32-NEXT:    kmovd %ecx, %k6
-; AVX512F-32-NEXT:    movb %bh, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    andb $2, %cl
-; AVX512F-32-NEXT:    shrb %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $29, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k5, %k0, %k5
-; AVX512F-32-NEXT:    kshiftrq $35, %k5, %k0
-; AVX512F-32-NEXT:    kxorq %k3, %k0, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $2, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $28, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k3, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $36, %k5, %k3
-; AVX512F-32-NEXT:    kxorq %k2, %k3, %k2
-; AVX512F-32-NEXT:    kmovd %eax, %k3
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrl $16, %eax
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $27, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k5, %k2
-; AVX512F-32-NEXT:    kshiftrq $37, %k2, %k5
-; AVX512F-32-NEXT:    kxorq %k4, %k5, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k5
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $13, %ecx
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $26, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $38, %k2, %k4
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    andb $2, %cl
-; AVX512F-32-NEXT:    shrb %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $25, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k7
-; AVX512F-32-NEXT:    kshiftrq $39, %k7, %k2
-; AVX512F-32-NEXT:    kxorq %k6, %k2, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %ecx
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $24, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
-; AVX512F-32-NEXT:    kshiftrq $40, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %edx, %k1
-; AVX512F-32-NEXT:    movzwl %bx, %esi
-; AVX512F-32-NEXT:    movl %esi, %edx
-; AVX512F-32-NEXT:    movl %esi, %edi
-; AVX512F-32-NEXT:    shrl $12, %esi
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $23, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $41, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kmovd %esi, %k7
-; AVX512F-32-NEXT:    shrl $14, %edi
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $22, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k6, %k0
-; AVX512F-32-NEXT:    kshiftrq $42, %k0, %k6
-; AVX512F-32-NEXT:    kxorq %k3, %k6, %k3
-; AVX512F-32-NEXT:    kmovd %edi, %k6
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $21, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k3, %k0, %k3
-; AVX512F-32-NEXT:    kshiftrq $43, %k3, %k0
-; AVX512F-32-NEXT:    kxorq %k5, %k0, %k5
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $20, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k5, %k3, %k5
-; AVX512F-32-NEXT:    kshiftrq $44, %k5, %k3
-; AVX512F-32-NEXT:    kxorq %k7, %k3, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $5, %cl
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $19, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $45, %k5, %k7
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $6, %cl
-; AVX512F-32-NEXT:    shrl $15, %edx
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $18, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $46, %k5, %k7
-; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $17, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $47, %k5, %k6
-; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $16, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k5, %k6
-; AVX512F-32-NEXT:    kshiftrq $48, %k6, %k5
-; AVX512F-32-NEXT:    kmovd %eax, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k5, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k5
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $24, %ecx
-; AVX512F-32-NEXT:    # kill: def $al killed $al killed $eax def $eax
-; AVX512F-32-NEXT:    shrb $7, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $15, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $49, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k2
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $14, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $50, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
-; AVX512F-32-NEXT:    andb $15, %cl
-; AVX512F-32-NEXT:    andb $2, %al
-; AVX512F-32-NEXT:    shrb %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $13, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $51, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    shrb $2, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $12, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $52, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k3, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    shrb $3, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $11, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $53, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k4
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrl $29, %eax
-; AVX512F-32-NEXT:    andb $1, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $10, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $54, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k5, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k5
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $9, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $55, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k2
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $8, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k6, %k2
-; AVX512F-32-NEXT:    kshiftrq $56, %k2, %k6
-; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $7, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k1
-; AVX512F-32-NEXT:    kshiftrq $57, %k1, %k2
-; AVX512F-32-NEXT:    kxorq %k0, %k2, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $6, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
-; AVX512F-32-NEXT:    kshiftrq $58, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k3, %k1, %k1
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrl $28, %eax
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $5, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $59, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k4, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $4, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $60, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %eax, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k1
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrl $31, %eax
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $30, %ecx
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $3, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $61, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k5, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $2, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $62, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $1, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftlq $1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $1, %k0, %k0
+; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kmovd %esi, %k0
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    korq %k1, %k0, %k1
-; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k2, %eax
-; AVX512F-32-NEXT:    kmovd %k0, %ecx
-; AVX512F-32-NEXT:    vpcmpgtb %zmm0, %zmm1, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k0, %edx
-; AVX512F-32-NEXT:    addl %ecx, %edx
-; AVX512F-32-NEXT:    kmovd %k2, %ecx
-; AVX512F-32-NEXT:    adcl %eax, %ecx
-; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k0, %eax
-; AVX512F-32-NEXT:    addl %edx, %eax
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %eax
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
 ; AVX512F-32-NEXT:    kmovd %k2, %edx
-; AVX512F-32-NEXT:    adcl %ecx, %edx
-; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k0, %ecx
-; AVX512F-32-NEXT:    addl %eax, %ecx
-; AVX512F-32-NEXT:    kmovd %k2, %eax
-; AVX512F-32-NEXT:    adcl %edx, %eax
-; AVX512F-32-NEXT:    vpcmpleb %zmm0, %zmm1, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k0, %edx
-; AVX512F-32-NEXT:    addl %ecx, %edx
+; AVX512F-32-NEXT:    vpcmpgtb %zmm0, %zmm1, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %edi
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
+; AVX512F-32-NEXT:    kmovd %k2, %ebx
+; AVX512F-32-NEXT:    addl %edx, %ebx
+; AVX512F-32-NEXT:    adcl %eax, %edi
+; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %eax
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
+; AVX512F-32-NEXT:    kmovd %k2, %edx
+; AVX512F-32-NEXT:    addl %ebx, %edx
+; AVX512F-32-NEXT:    adcl %edi, %eax
+; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %edi
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
+; AVX512F-32-NEXT:    kmovd %k2, %ebx
+; AVX512F-32-NEXT:    addl %edx, %ebx
+; AVX512F-32-NEXT:    adcl %eax, %edi
+; AVX512F-32-NEXT:    vpcmpleb %zmm0, %zmm1, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %ebp
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
 ; AVX512F-32-NEXT:    kmovd %k2, %ecx
-; AVX512F-32-NEXT:    adcl %eax, %ecx
-; AVX512F-32-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %k0, %eax
-; AVX512F-32-NEXT:    addl %edx, %eax
+; AVX512F-32-NEXT:    addl %ebx, %ecx
+; AVX512F-32-NEXT:    adcl %edi, %ebp
+; AVX512F-32-NEXT:    vpcmpgtb %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k1
 ; AVX512F-32-NEXT:    kmovd %k1, %edx
-; AVX512F-32-NEXT:    adcl %ecx, %edx
-; AVX512F-32-NEXT:    addl %ebp, %eax
-; AVX512F-32-NEXT:    adcl %ebx, %edx
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    adcl %ebp, %edx
+; AVX512F-32-NEXT:    addl %esi, %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    popl %esi
 ; AVX512F-32-NEXT:    popl %edi
 ; AVX512F-32-NEXT:    popl %ebx
@@ -2527,569 +2016,58 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    .cfi_offset %edi, -16
 ; AVX512F-32-NEXT:    .cfi_offset %ebx, -12
 ; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrl $16, %ecx
-; AVX512F-32-NEXT:    movl %ecx, %esi
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    andb $15, %cl
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
-; AVX512F-32-NEXT:    movl %ecx, %ebx
-; AVX512F-32-NEXT:    shrb $2, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movb %ah, %dl
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    shrb $3, %bl
-; AVX512F-32-NEXT:    kmovd %ebx, %k0
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $5, %cl
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $6, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k6
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $7, %cl
-; AVX512F-32-NEXT:    kmovd %eax, %k5
-; AVX512F-32-NEXT:    kshiftrq $1, %k5, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $62, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k5, %k7
-; AVX512F-32-NEXT:    kshiftrq $2, %k7, %k1
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k2
-; AVX512F-32-NEXT:    kmovd %ecx, %k5
-; AVX512F-32-NEXT:    movb %ah, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    movl %eax, %ebp
-; AVX512F-32-NEXT:    andb $2, %cl
-; AVX512F-32-NEXT:    shrb %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $61, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $3, %k7, %k2
-; AVX512F-32-NEXT:    kxorq %k0, %k2, %k0
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %edx, %ecx
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $60, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftrq $4, %k0, %k7
-; AVX512F-32-NEXT:    kxorq %k3, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %edx, %k3
-; AVX512F-32-NEXT:    movl %esi, %eax
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $59, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kshiftrq $5, %k7, %k0
-; AVX512F-32-NEXT:    kxorq %k4, %k0, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %ebp, %ecx
-; AVX512F-32-NEXT:    shrl $13, %ecx
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $58, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $6, %k7, %k4
-; AVX512F-32-NEXT:    kxorq %k6, %k4, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %eax, %ebx
-; AVX512F-32-NEXT:    andb $2, %bl
-; AVX512F-32-NEXT:    shrb %bl
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $57, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
-; AVX512F-32-NEXT:    kshiftrq $7, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k5, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ebx, %k5
-; AVX512F-32-NEXT:    movl %edx, %ecx
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $56, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k7
-; AVX512F-32-NEXT:    kshiftrq $8, %k7, %k6
-; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
-; AVX512F-32-NEXT:    kmovd %edx, %k6
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $55, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $9, %k7, %k1
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k2
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $54, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $10, %k7, %k2
-; AVX512F-32-NEXT:    kxorq %k3, %k2, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $5, %cl
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    movzwl %bp, %edx
-; AVX512F-32-NEXT:    movl %edx, %esi
-; AVX512F-32-NEXT:    movl %edx, %edi
-; AVX512F-32-NEXT:    shrl $12, %edx
-; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $53, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k3, %k7, %k3
-; AVX512F-32-NEXT:    kshiftrq $11, %k3, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $52, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $12, %k3, %k0
-; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $6, %cl
-; AVX512F-32-NEXT:    shrl $15, %esi
-; AVX512F-32-NEXT:    shrl $14, %edi
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $51, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $13, %k3, %k7
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k4
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $50, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $14, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %edi, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $49, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $15, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %esi, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $48, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $16, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %eax, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %ebp, %ecx
-; AVX512F-32-NEXT:    shrl $24, %ecx
-; AVX512F-32-NEXT:    # kill: def $al killed $al killed $eax def $eax
-; AVX512F-32-NEXT:    shrb $7, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $47, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $17, %k3, %k4
-; AVX512F-32-NEXT:    kxorq %k5, %k4, %k4
-; AVX512F-32-NEXT:    kmovd %eax, %k5
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $46, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k4
-; AVX512F-32-NEXT:    kshiftrq $18, %k4, %k3
-; AVX512F-32-NEXT:    kxorq %k6, %k3, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
-; AVX512F-32-NEXT:    andb $15, %cl
-; AVX512F-32-NEXT:    andb $2, %al
-; AVX512F-32-NEXT:    shrb %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $45, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k4, %k6
-; AVX512F-32-NEXT:    kshiftrq $19, %k6, %k4
-; AVX512F-32-NEXT:    kxorq %k1, %k4, %k1
-; AVX512F-32-NEXT:    kmovd %eax, %k4
-; AVX512F-32-NEXT:    movl %ecx, %edx
-; AVX512F-32-NEXT:    shrb $2, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $44, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
-; AVX512F-32-NEXT:    kshiftrq $20, %k1, %k6
-; AVX512F-32-NEXT:    kxorq %k2, %k6, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    andb $15, %al
-; AVX512F-32-NEXT:    shrb $3, %dl
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $43, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k1, %k6
-; AVX512F-32-NEXT:    kshiftrq $21, %k6, %k1
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
-; AVX512F-32-NEXT:    kmovd %edx, %k1
-; AVX512F-32-NEXT:    movl %ebp, %ecx
-; AVX512F-32-NEXT:    shrl $29, %ecx
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $42, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $22, %k6, %k0
-; AVX512F-32-NEXT:    kxorq %k7, %k0, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    andb $2, %cl
-; AVX512F-32-NEXT:    shrb %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $41, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $23, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k5, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k5
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $2, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $40, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k7
-; AVX512F-32-NEXT:    kshiftrq $24, %k7, %k6
-; AVX512F-32-NEXT:    kxorq %k3, %k6, %k3
-; AVX512F-32-NEXT:    kmovd %eax, %k6
-; AVX512F-32-NEXT:    movb %bh, %al
-; AVX512F-32-NEXT:    andb $15, %al
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $39, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k3, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $25, %k7, %k3
-; AVX512F-32-NEXT:    kxorq %k4, %k3, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $38, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k4
-; AVX512F-32-NEXT:    kshiftrq $26, %k4, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrb $5, %cl
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    movl %ebp, %edx
-; AVX512F-32-NEXT:    shrl $28, %edx
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $37, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $27, %k4, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $36, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k4, %k1
-; AVX512F-32-NEXT:    kshiftrq $28, %k1, %k4
-; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrb $6, %cl
-; AVX512F-32-NEXT:    movl %ebp, %edx
-; AVX512F-32-NEXT:    shrl $31, %edx
-; AVX512F-32-NEXT:    movl %ebp, %esi
-; AVX512F-32-NEXT:    shrl $30, %esi
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $35, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $29, %k1, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $34, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
-; AVX512F-32-NEXT:    kshiftrq $30, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %esi, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $33, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $31, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $32, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %ebx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k1, %k1
-; AVX512F-32-NEXT:    kmovd %ecx, %k7
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrb $7, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $31, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $33, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k5, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $30, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $34, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k6, %k1, %k5
-; AVX512F-32-NEXT:    kmovd %ecx, %k6
-; AVX512F-32-NEXT:    movb %bh, %cl
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    andb $2, %cl
-; AVX512F-32-NEXT:    shrb %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $29, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k5, %k0, %k5
-; AVX512F-32-NEXT:    kshiftrq $35, %k5, %k0
-; AVX512F-32-NEXT:    kxorq %k3, %k0, %k3
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $2, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $28, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k3, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $36, %k5, %k3
-; AVX512F-32-NEXT:    kxorq %k2, %k3, %k2
-; AVX512F-32-NEXT:    kmovd %eax, %k3
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrl $16, %eax
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $27, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k5, %k2
-; AVX512F-32-NEXT:    kshiftrq $37, %k2, %k5
-; AVX512F-32-NEXT:    kxorq %k4, %k5, %k4
-; AVX512F-32-NEXT:    kmovd %ecx, %k5
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $13, %ecx
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k4, %k4
-; AVX512F-32-NEXT:    kshiftrq $26, %k4, %k4
-; AVX512F-32-NEXT:    kxorq %k4, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $38, %k2, %k4
-; AVX512F-32-NEXT:    kxorq %k7, %k4, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    andb $2, %cl
-; AVX512F-32-NEXT:    shrb %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $25, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k2, %k7
-; AVX512F-32-NEXT:    kshiftrq $39, %k7, %k2
-; AVX512F-32-NEXT:    kxorq %k6, %k2, %k6
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %ecx
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $24, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
-; AVX512F-32-NEXT:    kshiftrq $40, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %edx, %k1
-; AVX512F-32-NEXT:    movzwl %bx, %esi
-; AVX512F-32-NEXT:    movl %esi, %edx
-; AVX512F-32-NEXT:    movl %esi, %edi
-; AVX512F-32-NEXT:    shrl $12, %esi
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $23, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $41, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k0
-; AVX512F-32-NEXT:    kmovd %esi, %k7
-; AVX512F-32-NEXT:    shrl $14, %edi
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $22, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k6, %k0
-; AVX512F-32-NEXT:    kshiftrq $42, %k0, %k6
-; AVX512F-32-NEXT:    kxorq %k3, %k6, %k3
-; AVX512F-32-NEXT:    kmovd %edi, %k6
-; AVX512F-32-NEXT:    shrb $3, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k3, %k3
-; AVX512F-32-NEXT:    kshiftrq $21, %k3, %k3
-; AVX512F-32-NEXT:    kxorq %k3, %k0, %k3
-; AVX512F-32-NEXT:    kshiftrq $43, %k3, %k0
-; AVX512F-32-NEXT:    kxorq %k5, %k0, %k5
-; AVX512F-32-NEXT:    kmovd %ecx, %k0
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $4, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $20, %k5, %k5
-; AVX512F-32-NEXT:    kxorq %k5, %k3, %k5
-; AVX512F-32-NEXT:    kshiftrq $44, %k5, %k3
-; AVX512F-32-NEXT:    kxorq %k7, %k3, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $5, %cl
-; AVX512F-32-NEXT:    andb $1, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $19, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $45, %k5, %k7
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k4
-; AVX512F-32-NEXT:    movl %eax, %ecx
-; AVX512F-32-NEXT:    shrb $6, %cl
-; AVX512F-32-NEXT:    shrl $15, %edx
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $18, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $46, %k5, %k7
-; AVX512F-32-NEXT:    kxorq %k6, %k7, %k6
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $17, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k5, %k5
-; AVX512F-32-NEXT:    kshiftrq $47, %k5, %k6
-; AVX512F-32-NEXT:    kmovd %edx, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftlq $63, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $16, %k6, %k6
-; AVX512F-32-NEXT:    kxorq %k6, %k5, %k6
-; AVX512F-32-NEXT:    kshiftrq $48, %k6, %k5
-; AVX512F-32-NEXT:    kmovd %eax, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k5, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k5
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $24, %ecx
-; AVX512F-32-NEXT:    # kill: def $al killed $al killed $eax def $eax
-; AVX512F-32-NEXT:    shrb $7, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $15, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $49, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k2
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $14, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $50, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k1, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
-; AVX512F-32-NEXT:    # kill: def $cl killed $cl killed $ecx def $ecx
-; AVX512F-32-NEXT:    andb $15, %cl
-; AVX512F-32-NEXT:    andb $2, %al
-; AVX512F-32-NEXT:    shrb %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $13, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $51, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k0, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    shrb $2, %cl
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $12, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $52, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k3, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %ecx, %k3
-; AVX512F-32-NEXT:    shrb $3, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $11, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $53, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k4, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k4
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrl $29, %eax
-; AVX512F-32-NEXT:    andb $1, %al
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $10, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $54, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k5, %k7, %k7
-; AVX512F-32-NEXT:    kmovd %eax, %k5
-; AVX512F-32-NEXT:    kshiftlq $63, %k7, %k7
-; AVX512F-32-NEXT:    kshiftrq $9, %k7, %k7
-; AVX512F-32-NEXT:    kxorq %k7, %k6, %k6
-; AVX512F-32-NEXT:    kshiftrq $55, %k6, %k7
-; AVX512F-32-NEXT:    kxorq %k2, %k7, %k2
-; AVX512F-32-NEXT:    kshiftlq $63, %k2, %k2
-; AVX512F-32-NEXT:    kshiftrq $8, %k2, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k6, %k2
-; AVX512F-32-NEXT:    kshiftrq $56, %k2, %k6
-; AVX512F-32-NEXT:    kxorq %k1, %k6, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $7, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k2, %k1
-; AVX512F-32-NEXT:    kshiftrq $57, %k1, %k2
-; AVX512F-32-NEXT:    kxorq %k0, %k2, %k0
-; AVX512F-32-NEXT:    kshiftlq $63, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $6, %k0, %k0
-; AVX512F-32-NEXT:    kxorq %k0, %k1, %k0
-; AVX512F-32-NEXT:    kshiftrq $58, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k3, %k1, %k1
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrl $28, %eax
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $5, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $59, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k4, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $4, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $60, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %eax, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k1
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrl $31, %eax
-; AVX512F-32-NEXT:    movl %ebx, %ecx
-; AVX512F-32-NEXT:    shrl $30, %ecx
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $3, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $61, %k0, %k1
-; AVX512F-32-NEXT:    kxorq %k5, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $2, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $62, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %ecx, %k2
-; AVX512F-32-NEXT:    kxorq %k2, %k1, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    kshiftrq $1, %k1, %k1
-; AVX512F-32-NEXT:    kxorq %k1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftlq $1, %k0, %k0
-; AVX512F-32-NEXT:    kshiftrq $1, %k0, %k0
+; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kmovd %esi, %k0
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
-; AVX512F-32-NEXT:    kshiftlq $63, %k1, %k1
-; AVX512F-32-NEXT:    korq %k1, %k0, %k1
-; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k2, %eax
-; AVX512F-32-NEXT:    kmovd %k0, %ecx
-; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k0, %edx
-; AVX512F-32-NEXT:    addl %ecx, %edx
-; AVX512F-32-NEXT:    kmovd %k2, %ecx
-; AVX512F-32-NEXT:    adcl %eax, %ecx
-; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k0, %eax
-; AVX512F-32-NEXT:    addl %edx, %eax
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %eax
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
 ; AVX512F-32-NEXT:    kmovd %k2, %edx
-; AVX512F-32-NEXT:    adcl %ecx, %edx
-; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k0, %ecx
-; AVX512F-32-NEXT:    addl %eax, %ecx
-; AVX512F-32-NEXT:    kmovd %k2, %eax
-; AVX512F-32-NEXT:    adcl %edx, %eax
-; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k2
-; AVX512F-32-NEXT:    kmovd %k0, %edx
-; AVX512F-32-NEXT:    addl %ecx, %edx
+; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %edi
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
+; AVX512F-32-NEXT:    kmovd %k2, %ebx
+; AVX512F-32-NEXT:    addl %edx, %ebx
+; AVX512F-32-NEXT:    adcl %eax, %edi
+; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %eax
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
+; AVX512F-32-NEXT:    kmovd %k2, %edx
+; AVX512F-32-NEXT:    addl %ebx, %edx
+; AVX512F-32-NEXT:    adcl %edi, %eax
+; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %edi
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
+; AVX512F-32-NEXT:    kmovd %k2, %ebx
+; AVX512F-32-NEXT:    addl %edx, %ebx
+; AVX512F-32-NEXT:    adcl %eax, %edi
+; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k3
+; AVX512F-32-NEXT:    kmovd %k3, %ebp
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k2
 ; AVX512F-32-NEXT:    kmovd %k2, %ecx
-; AVX512F-32-NEXT:    adcl %eax, %ecx
-; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT:    kshiftrq $32, %k0, %k1
-; AVX512F-32-NEXT:    kmovd %k0, %eax
-; AVX512F-32-NEXT:    addl %edx, %eax
+; AVX512F-32-NEXT:    addl %ebx, %ecx
+; AVX512F-32-NEXT:    adcl %edi, %ebp
+; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k2
+; AVX512F-32-NEXT:    kshiftrq $32, %k2, %k3
+; AVX512F-32-NEXT:    kandd %k1, %k3, %k1
 ; AVX512F-32-NEXT:    kmovd %k1, %edx
-; AVX512F-32-NEXT:    adcl %ecx, %edx
-; AVX512F-32-NEXT:    addl %ebp, %eax
-; AVX512F-32-NEXT:    adcl %ebx, %edx
+; AVX512F-32-NEXT:    kandd %k0, %k2, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    adcl %ebp, %edx
+; AVX512F-32-NEXT:    addl %esi, %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
 ; AVX512F-32-NEXT:    popl %esi
 ; AVX512F-32-NEXT:    popl %edi
 ; AVX512F-32-NEXT:    popl %ebx




More information about the llvm-commits mailing list