[llvm] r320120 - [X86] Handle alls version of vXi1 insert_vector_elt with a constant index without falling back to shuffles.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 7 16:16:09 PST 2017
Author: ctopper
Date: Thu Dec 7 16:16:09 2017
New Revision: 320120
URL: http://llvm.org/viewvc/llvm-project?rev=320120&view=rev
Log:
[X86] Handle alls version of vXi1 insert_vector_elt with a constant index without falling back to shuffles.
We previously only supported inserting to the LSB or MSB where it was easy to zero to perform an OR to insert.
This change effectively extracts the old value and the new value, xors them together and then xors that single bit with the correct location in the original vector. This will cancel out the old value in the first xor leaving the new value in the position.
The way I've implemented this uses 3 shifts and two xors and uses an additional register. We can avoid the additional register at the cost of another shift.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=320120&r1=320119&r2=320120&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 7 16:16:09 2017
@@ -14699,21 +14699,14 @@ static SDValue InsertBitToMaskVector(SDV
// If the kshift instructions of the correct width aren't natively supported
// then we need to promote the vector to the native size to get the correct
// zeroing behavior.
- bool HasNativeShift = true;
if ((!Subtarget.hasDQI() && NumElems == 8) || (NumElems < 8)) {
- HasNativeShift = false;
- // For now don't do this if we are going to end up using the shuffle
- // below. This minimizes test diffs.
- // TODO: Remove this restriction once we no longer need a shuffle fallback.
- if (Vec.isUndef() || IdxVal == 0) {
- // Need to promote to v16i1, do the insert, then extract back.
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
- DAG.getUNDEF(MVT::v16i1), Vec,
- DAG.getIntPtrConstant(0, dl));
- Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
- DAG.getIntPtrConstant(0, dl));
- }
+ // Need to promote to v16i1, do the insert, then extract back.
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+ DAG.getUNDEF(MVT::v16i1), Vec,
+ DAG.getIntPtrConstant(0, dl));
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i1, Vec, Elt, Idx);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Op,
+ DAG.getIntPtrConstant(0, dl));
}
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
@@ -14741,7 +14734,7 @@ static SDValue InsertBitToMaskVector(SDV
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
// Insertion of one bit into last position
- if (HasNativeShift && IdxVal == NumElems - 1) {
+ if (IdxVal == NumElems - 1) {
// Move the bit to the last position inside the vector.
EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
@@ -14754,12 +14747,20 @@ static SDValue InsertBitToMaskVector(SDV
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
- // Use shuffle to insert element.
- SmallVector<int, 64> MaskVec(NumElems);
- for (unsigned i = 0; i != NumElems; ++i)
- MaskVec[i] = (i == IdxVal) ? NumElems : i;
-
- return DAG.getVectorShuffle(VecVT, dl, Vec, EltInVec, MaskVec);
+ // Move the current value of the bit to be replace to bit 0.
+ SDValue Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ // Xor with the new bit.
+ Merged = DAG.getNode(ISD::XOR, dl, VecVT, Merged, EltInVec);
+ // Shift to MSB, filling bottom bits with 0.
+ Merged = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Merged,
+ DAG.getConstant(NumElems - 1, dl, MVT::i8));
+ // Shift to the final position, filling upper bits with 0.
+ Merged = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Merged,
+ DAG.getConstant(NumElems - 1 - IdxVal, dl, MVT::i8));
+ // Xor with original vector to cancel out the original bit value that's still
+ // present.
+ return DAG.getNode(ISD::XOR, dl, VecVT, Merged, Vec);
}
SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=320120&r1=320119&r2=320120&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Thu Dec 7 16:16:09 2017
@@ -309,31 +309,28 @@ define i16 @test16(i1 *%addr, i16 %a) {
; KNL-LABEL: test16:
; KNL: ## %bb.0:
; KNL-NEXT: movb (%rdi), %al
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kmovw %eax, %k2
-; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; KNL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15]
-; KNL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; KNL-NEXT: vpslld $31, %zmm2, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %esi, %k0
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: kshiftrw $10, %k0, %k2
+; KNL-NEXT: kxorw %k1, %k2, %k1
+; KNL-NEXT: kshiftlw $15, %k1, %k1
+; KNL-NEXT: kshiftrw $5, %k1, %k1
+; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %ax killed %ax killed %eax
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test16:
; SKX: ## %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0
; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: vpmovm2d %k0, %zmm0
-; SKX-NEXT: vpmovm2d %k1, %zmm1
-; SKX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,7,8,9,16,11,12,13,14,15]
-; SKX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; SKX-NEXT: vpmovd2m %zmm2, %k0
+; SKX-NEXT: kshiftrw $10, %k1, %k2
+; SKX-NEXT: kxorw %k0, %k2, %k0
+; SKX-NEXT: kshiftlw $15, %k0, %k0
+; SKX-NEXT: kshiftrw $5, %k0, %k0
+; SKX-NEXT: kxorw %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %ax killed %ax killed %eax
-; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x = load i1 , i1 * %addr, align 128
%a1 = bitcast i16 %a to <16 x i1>
@@ -346,31 +343,28 @@ define i8 @test17(i1 *%addr, i8 %a) {
; KNL-LABEL: test17:
; KNL: ## %bb.0:
; KNL-NEXT: movb (%rdi), %al
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: kmovw %eax, %k2
-; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7]
-; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
-; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %esi, %k0
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: kshiftrw $4, %k0, %k2
+; KNL-NEXT: kxorw %k1, %k2, %k1
+; KNL-NEXT: kshiftlw $15, %k1, %k1
+; KNL-NEXT: kshiftrw $11, %k1, %k1
+; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %al killed %al killed %eax
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test17:
; SKX: ## %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0
; SKX-NEXT: kmovd %esi, %k1
-; SKX-NEXT: vpmovm2q %k0, %zmm0
-; SKX-NEXT: vpmovm2q %k1, %zmm1
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,5,6,7]
-; SKX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; SKX-NEXT: vpmovq2m %zmm2, %k0
+; SKX-NEXT: kshiftrb $4, %k1, %k2
+; SKX-NEXT: kxorb %k0, %k2, %k0
+; SKX-NEXT: kshiftlb $7, %k0, %k0
+; SKX-NEXT: kshiftrb $3, %k0, %k0
+; SKX-NEXT: kxorb %k1, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
-; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%x = load i1 , i1 * %addr, align 128
%a1 = bitcast i8 %a to <8 x i1>
@@ -962,12 +956,12 @@ define i32 @test_insertelement_v32i1(i32
; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k0
; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k1
; SKX-NEXT: kunpckwd %k0, %k1, %k0
-; SKX-NEXT: vpmovm2w %k0, %zmm0
-; SKX-NEXT: kmovd %eax, %k0
-; SKX-NEXT: vpmovm2w %k0, %zmm1
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,32,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
-; SKX-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
-; SKX-NEXT: vpmovw2m %zmm2, %k0
+; SKX-NEXT: kshiftrd $4, %k0, %k1
+; SKX-NEXT: kmovd %eax, %k2
+; SKX-NEXT: kxord %k2, %k1, %k1
+; SKX-NEXT: kshiftld $31, %k1, %k1
+; SKX-NEXT: kshiftrd $27, %k1, %k1
+; SKX-NEXT: kxord %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@@ -988,37 +982,33 @@ define i8 @test_iinsertelement_v4i1(i32
; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpextrb $4, %xmm0, %ecx
-; KNL-NEXT: kmovw %ecx, %k1
-; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: vpextrb $0, %xmm0, %ecx
; KNL-NEXT: andl $1, %ecx
-; KNL-NEXT: kmovw %ecx, %k0
-; KNL-NEXT: kshiftrw $1, %k0, %k1
-; KNL-NEXT: kshiftlw $1, %k1, %k1
-; KNL-NEXT: korw %k0, %k1, %k1
-; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; KNL-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
-; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
-; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; KNL-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; KNL-NEXT: vpsllq $63, %zmm3, %zmm1
-; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
-; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT: kmovw %ecx, %k1
+; KNL-NEXT: kshiftrw $1, %k0, %k2
+; KNL-NEXT: kshiftlw $1, %k2, %k2
+; KNL-NEXT: korw %k1, %k2, %k1
+; KNL-NEXT: kshiftrw $1, %k1, %k2
+; KNL-NEXT: kxorw %k0, %k2, %k0
+; KNL-NEXT: kshiftlw $15, %k0, %k0
+; KNL-NEXT: kshiftrw $14, %k0, %k0
+; KNL-NEXT: kxorw %k1, %k0, %k0
+; KNL-NEXT: kshiftrw $2, %k0, %k1
+; KNL-NEXT: kmovw %eax, %k2
+; KNL-NEXT: kxorw %k2, %k1, %k1
+; KNL-NEXT: kshiftlw $15, %k1, %k1
+; KNL-NEXT: kshiftrw $13, %k1, %k1
+; KNL-NEXT: kxorw %k0, %k1, %k0
+; KNL-NEXT: kshiftrw $3, %k0, %k1
; KNL-NEXT: vpextrb $12, %xmm0, %eax
-; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; KNL-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
-; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT: kmovw %eax, %k2
+; KNL-NEXT: kxorw %k2, %k1, %k1
+; KNL-NEXT: kshiftlw $15, %k1, %k1
+; KNL-NEXT: kshiftrw $12, %k1, %k1
+; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %al killed %al killed %eax
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_iinsertelement_v4i1:
@@ -1026,12 +1016,12 @@ define i8 @test_iinsertelement_v4i1(i32
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
-; SKX-NEXT: vpmovm2d %k0, %xmm0
-; SKX-NEXT: kmovd %eax, %k0
-; SKX-NEXT: vpmovm2d %k0, %xmm1
-; SKX-NEXT: vpbroadcastq %xmm1, %xmm1
-; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
-; SKX-NEXT: vpmovd2m %xmm0, %k0
+; SKX-NEXT: kshiftrw $2, %k0, %k1
+; SKX-NEXT: kmovd %eax, %k2
+; SKX-NEXT: kxorw %k2, %k1, %k1
+; SKX-NEXT: kshiftlw $15, %k1, %k1
+; SKX-NEXT: kshiftrw $13, %k1, %k1
+; SKX-NEXT: kxorw %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
; SKX-NEXT: retq
@@ -1057,17 +1047,15 @@ define i8 @test_iinsertelement_v2i1(i32
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
-; KNL-NEXT: korw %k0, %k1, %k1
-; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
-; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT: korw %k0, %k1, %k0
+; KNL-NEXT: kshiftrw $1, %k0, %k1
+; KNL-NEXT: kmovw %eax, %k2
+; KNL-NEXT: kxorw %k2, %k1, %k1
+; KNL-NEXT: kshiftlw $15, %k1, %k1
+; KNL-NEXT: kshiftrw $14, %k1, %k1
+; KNL-NEXT: kxorw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: def %al killed %al killed %eax
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: test_iinsertelement_v2i1:
@@ -1075,11 +1063,12 @@ define i8 @test_iinsertelement_v2i1(i32
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
-; SKX-NEXT: vpmovm2q %k0, %xmm0
-; SKX-NEXT: kmovd %eax, %k0
-; SKX-NEXT: vpmovm2q %k0, %xmm1
-; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SKX-NEXT: vpmovq2m %xmm0, %k0
+; SKX-NEXT: kshiftrw $1, %k0, %k1
+; SKX-NEXT: kmovd %eax, %k2
+; SKX-NEXT: kxorw %k2, %k1, %k1
+; SKX-NEXT: kshiftlw $15, %k1, %k1
+; SKX-NEXT: kshiftrw $14, %k1, %k1
+; SKX-NEXT: kxorw %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def %al killed %al killed %eax
; SKX-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=320120&r1=320119&r2=320120&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Thu Dec 7 16:16:09 2017
@@ -972,14 +972,11 @@ define <64 x i8> @test16(i64 %x) {
; SKX-NEXT: kmovq %rdi, %k0
; SKX-NEXT: movb $1, %al
; SKX-NEXT: kmovd %eax, %k1
-; SKX-NEXT: vpmovm2b %k1, %zmm0
-; SKX-NEXT: vpsllq $40, %xmm0, %xmm0
-; SKX-NEXT: vpmovm2b %k0, %zmm1
-; SKX-NEXT: movl $32, %eax
-; SKX-NEXT: kmovd %eax, %k1
-; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; SKX-NEXT: vpmovb2m %zmm0, %k0
+; SKX-NEXT: kshiftrq $5, %k0, %k2
+; SKX-NEXT: kxorq %k1, %k2, %k1
+; SKX-NEXT: kshiftlq $63, %k1, %k1
+; SKX-NEXT: kshiftrq $58, %k1, %k1
+; SKX-NEXT: kxorq %k0, %k1, %k0
; SKX-NEXT: vpmovm2b %k0, %zmm0
; SKX-NEXT: retq
;
@@ -988,13 +985,11 @@ define <64 x i8> @test16(i64 %x) {
; AVX512BW-NEXT: kmovq %rdi, %k0
; AVX512BW-NEXT: movb $1, %al
; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpmovm2b %k1, %zmm0
-; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
+; AVX512BW-NEXT: kxorq %k1, %k2, %k1
+; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
+; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
+; AVX512BW-NEXT: kxorq %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -1085,14 +1080,11 @@ define <64 x i8> @test17(i64 %x, i32 %y,
; SKX-NEXT: cmpl %edx, %esi
; SKX-NEXT: setg %al
; SKX-NEXT: kmovd %eax, %k1
-; SKX-NEXT: vpmovm2b %k1, %zmm0
-; SKX-NEXT: vpsllq $40, %xmm0, %xmm0
-; SKX-NEXT: vpmovm2b %k0, %zmm1
-; SKX-NEXT: movl $32, %eax
-; SKX-NEXT: kmovd %eax, %k1
-; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; SKX-NEXT: vpmovb2m %zmm0, %k0
+; SKX-NEXT: kshiftrq $5, %k0, %k2
+; SKX-NEXT: kxorq %k1, %k2, %k1
+; SKX-NEXT: kshiftlq $63, %k1, %k1
+; SKX-NEXT: kshiftrq $58, %k1, %k1
+; SKX-NEXT: kxorq %k0, %k1, %k0
; SKX-NEXT: vpmovm2b %k0, %zmm0
; SKX-NEXT: retq
;
@@ -1102,13 +1094,11 @@ define <64 x i8> @test17(i64 %x, i32 %y,
; AVX512BW-NEXT: cmpl %edx, %esi
; AVX512BW-NEXT: setg %al
; AVX512BW-NEXT: kmovd %eax, %k1
-; AVX512BW-NEXT: vpmovm2b %k1, %zmm0
-; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0
-; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; AVX512BW-NEXT: vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT: kshiftrq $5, %k0, %k2
+; AVX512BW-NEXT: kxorq %k1, %k2, %k1
+; AVX512BW-NEXT: kshiftlq $63, %k1, %k1
+; AVX512BW-NEXT: kshiftrq $58, %k1, %k1
+; AVX512BW-NEXT: kxorq %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
; AVX512BW-NEXT: retq
;
@@ -1159,24 +1149,22 @@ define <64 x i8> @test17(i64 %x, i32 %y,
define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-LABEL: test18:
; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k2
-; KNL-NEXT: kmovw %esi, %k0
-; KNL-NEXT: kshiftlw $7, %k0, %k1
+; KNL-NEXT: kmovw %edi, %k0
+; KNL-NEXT: kmovw %esi, %k1
+; KNL-NEXT: kshiftlw $7, %k1, %k2
+; KNL-NEXT: kshiftrw $15, %k2, %k2
+; KNL-NEXT: kshiftlw $6, %k1, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kshiftlw $6, %k0, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k3
-; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
-; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
-; KNL-NEXT: vptestmq %zmm0, %zmm0, %k2
-; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; KNL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8]
-; KNL-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; KNL-NEXT: vpsllq $63, %zmm2, %zmm0
-; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT: kshiftrw $6, %k0, %k3
+; KNL-NEXT: kxorw %k1, %k3, %k1
+; KNL-NEXT: kshiftlw $15, %k1, %k1
+; KNL-NEXT: kshiftrw $9, %k1, %k1
+; KNL-NEXT: kxorw %k0, %k1, %k0
+; KNL-NEXT: kshiftrw $7, %k0, %k1
+; KNL-NEXT: kxorw %k2, %k1, %k1
+; KNL-NEXT: kshiftlw $15, %k1, %k1
+; KNL-NEXT: kshiftrw $8, %k1, %k1
+; KNL-NEXT: kxorw %k0, %k1, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
@@ -1185,45 +1173,42 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
;
; SKX-LABEL: test18:
; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k1
-; SKX-NEXT: kmovd %esi, %k2
-; SKX-NEXT: kshiftlw $7, %k2, %k0
-; SKX-NEXT: kshiftrw $15, %k0, %k0
-; SKX-NEXT: kshiftlw $6, %k2, %k2
+; SKX-NEXT: kmovd %edi, %k0
+; SKX-NEXT: kmovd %esi, %k1
+; SKX-NEXT: kshiftlw $7, %k1, %k2
; SKX-NEXT: kshiftrw $15, %k2, %k2
-; SKX-NEXT: vpmovm2q %k1, %zmm0
-; SKX-NEXT: vpmovm2q %k2, %zmm1
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
-; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; SKX-NEXT: vpmovq2m %zmm2, %k1
-; SKX-NEXT: kshiftlb $1, %k1, %k1
+; SKX-NEXT: kshiftlw $6, %k1, %k1
+; SKX-NEXT: kshiftrw $15, %k1, %k1
+; SKX-NEXT: kshiftrb $6, %k0, %k3
+; SKX-NEXT: kxorb %k1, %k3, %k1
+; SKX-NEXT: kshiftlb $7, %k1, %k1
; SKX-NEXT: kshiftrb $1, %k1, %k1
-; SKX-NEXT: kshiftlb $7, %k0, %k0
-; SKX-NEXT: korb %k0, %k1, %k0
+; SKX-NEXT: kxorb %k0, %k1, %k0
+; SKX-NEXT: kshiftlb $1, %k0, %k0
+; SKX-NEXT: kshiftrb $1, %k0, %k0
+; SKX-NEXT: kshiftlb $7, %k2, %k1
+; SKX-NEXT: korb %k1, %k0, %k0
; SKX-NEXT: vpmovm2w %k0, %xmm0
-; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
;
; AVX512BW-LABEL: test18:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k2
-; AVX512BW-NEXT: kmovd %esi, %k0
-; AVX512BW-NEXT: kshiftlw $7, %k0, %k1
+; AVX512BW-NEXT: kmovd %edi, %k0
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: kshiftlw $7, %k1, %k2
+; AVX512BW-NEXT: kshiftrw $15, %k2, %k2
+; AVX512BW-NEXT: kshiftlw $6, %k1, %k1
; AVX512BW-NEXT: kshiftrw $15, %k1, %k1
-; AVX512BW-NEXT: kshiftlw $6, %k0, %k0
-; AVX512BW-NEXT: kshiftrw $15, %k0, %k3
-; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k3} {z}
-; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
-; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0
-; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k2
-; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; AVX512BW-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,6,8]
-; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; AVX512BW-NEXT: vpsllq $63, %zmm2, %zmm0
-; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: kshiftrw $6, %k0, %k3
+; AVX512BW-NEXT: kxorw %k1, %k3, %k1
+; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
+; AVX512BW-NEXT: kshiftrw $9, %k1, %k1
+; AVX512BW-NEXT: kxorw %k0, %k1, %k0
+; AVX512BW-NEXT: kshiftrw $7, %k0, %k1
+; AVX512BW-NEXT: kxorw %k2, %k1, %k1
+; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
+; AVX512BW-NEXT: kshiftrw $8, %k1, %k1
+; AVX512BW-NEXT: kxorw %k0, %k1, %k0
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0
; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0
; AVX512BW-NEXT: vzeroupper
@@ -1231,21 +1216,21 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
;
; AVX512DQ-LABEL: test18:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k1
-; AVX512DQ-NEXT: kmovw %esi, %k2
-; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kshiftlw $6, %k2, %k2
+; AVX512DQ-NEXT: kmovw %edi, %k0
+; AVX512DQ-NEXT: kmovw %esi, %k1
+; AVX512DQ-NEXT: kshiftlw $7, %k1, %k2
; AVX512DQ-NEXT: kshiftrw $15, %k2, %k2
-; AVX512DQ-NEXT: vpmovm2q %k1, %zmm0
-; AVX512DQ-NEXT: vpmovm2q %k2, %zmm1
-; AVX512DQ-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7]
-; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; AVX512DQ-NEXT: vpmovq2m %zmm2, %k1
-; AVX512DQ-NEXT: kshiftlb $1, %k1, %k1
+; AVX512DQ-NEXT: kshiftlw $6, %k1, %k1
+; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
+; AVX512DQ-NEXT: kshiftrb $6, %k0, %k3
+; AVX512DQ-NEXT: kxorb %k1, %k3, %k1
+; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1
; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0
-; AVX512DQ-NEXT: korb %k0, %k1, %k0
+; AVX512DQ-NEXT: kxorb %k0, %k1, %k0
+; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0
+; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0
+; AVX512DQ-NEXT: kshiftlb $7, %k2, %k1
+; AVX512DQ-NEXT: korb %k1, %k0, %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %ymm0
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=320120&r1=320119&r2=320120&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Thu Dec 7 16:16:09 2017
@@ -7325,14 +7325,11 @@ define <64 x i8> @vmov_test16(i64 %x) {
; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
; GENERIC-NEXT: movb $1, %al # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
-; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00]
+; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7341,14 +7338,11 @@ define <64 x i8> @vmov_test16(i64 %x) {
; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00]
; SKX-NEXT: movb $1, %al # sched: [1:0.25]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.25]
-; SKX-NEXT: movl $32, %eax # sched: [1:0.25]
-; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00]
+; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
+; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
+; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
+; SKX-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i64 %x to <64 x i1>
@@ -7365,14 +7359,11 @@ define <64 x i8> @vmov_test17(i64 %x, i3
; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33]
; GENERIC-NEXT: setg %al # sched: [1:0.50]
; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
-; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00]
+; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7382,14 +7373,11 @@ define <64 x i8> @vmov_test17(i64 %x, i3
; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25]
; SKX-NEXT: setg %al # sched: [1:0.50]
; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.25]
-; SKX-NEXT: movl $32, %eax # sched: [1:0.25]
-; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00]
+; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
+; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
+; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
+; SKX-NEXT: kxorq %k0, %k1, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i64 %x to <64 x i1>
@@ -7402,44 +7390,42 @@ define <64 x i8> @vmov_test17(i64 %x, i3
define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
; GENERIC-LABEL: vmov_test18:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftlw $7, %k2, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftlw $6, %k2, %k2 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kshiftlw $7, %k1, %k2 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrw $15, %k2, %k2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [4:0.50]
-; GENERIC-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlw $6, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrb $6, %k0, %k3 # sched: [1:1.00]
+; GENERIC-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlb $7, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kxorb %k0, %k1, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlb $1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrb $1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlb $7, %k2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vmov_test18:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: kmovd %esi, %k2 # sched: [1:1.00]
-; SKX-NEXT: kshiftlw $7, %k2, %k0 # sched: [3:1.00]
-; SKX-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kshiftlw $6, %k2, %k2 # sched: [3:1.00]
+; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
+; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
+; SKX-NEXT: kshiftlw $7, %k1, %k2 # sched: [3:1.00]
; SKX-NEXT: kshiftrw $15, %k2, %k2 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.25]
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [8:0.50]
-; SKX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [3:1.00]
-; SKX-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:1.00]
-; SKX-NEXT: kshiftlb $1, %k1, %k1 # sched: [3:1.00]
+; SKX-NEXT: kshiftlw $6, %k1, %k1 # sched: [3:1.00]
+; SKX-NEXT: kshiftrw $15, %k1, %k1 # sched: [3:1.00]
+; SKX-NEXT: kshiftrb $6, %k0, %k3 # sched: [3:1.00]
+; SKX-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00]
+; SKX-NEXT: kshiftlb $7, %k1, %k1 # sched: [3:1.00]
; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: kshiftlb $7, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
+; SKX-NEXT: kxorb %k0, %k1, %k0 # sched: [1:1.00]
+; SKX-NEXT: kshiftlb $1, %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kshiftrb $1, %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: kshiftlb $7, %k2, %k1 # sched: [3:1.00]
+; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%b = bitcast i8 %a to <8 x i1>
%b1 = bitcast i16 %y to <16 x i1>
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=320120&r1=320119&r2=320120&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Thu Dec 7 16:16:09 2017
@@ -120,713 +120,537 @@ define <8 x i64> @test_mm512_mask_set1_e
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: vmovdqa64 %zmm0, %zmm3
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andb $2, %cl
-; X32-NEXT: shrb %cl
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpsllw $8, %xmm1, %xmm1
-; X32-NEXT: kmovd %eax, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: kshiftrq $1, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $62, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $2, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: andb $15, %al
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpbroadcastw %xmm2, %xmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpslld $24, %xmm2, %xmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $4, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpbroadcastd %xmm2, %xmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $5, %cl
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpsllq $40, %xmm2, %xmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $6, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpbroadcastw %xmm2, %xmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm2
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpsllq $56, %xmm1, %xmm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movb %ah, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastq %xmm1, %xmm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: andb $2, %cl
-; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6]
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movb %ah, %cl
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $61, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $3, %k0, %k1
+; X32-NEXT: shrb $3, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $60, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $4, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $4, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $59, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $5, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $5, %al
+; X32-NEXT: andb $1, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $58, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $6, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $6, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $57, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $7, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $7, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $56, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $8, %k0, %k1
+; X32-NEXT: movb %ch, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $55, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $9, %k0, %k1
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $54, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $10, %k0, %k1
+; X32-NEXT: movb %ch, %al
+; X32-NEXT: andb $15, %al
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $12, %ecx
-; X32-NEXT: andl $15, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastd %xmm1, %xmm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $13, %ecx
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2]
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $14, %ecx
-; X32-NEXT: andl $3, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $15, %ecx
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $16, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andb $2, %dl
-; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpsllw $8, %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $53, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $11, %k0, %k1
+; X32-NEXT: shrb $3, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $12, %eax
+; X32-NEXT: andl $15, %eax
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $13, %eax
+; X32-NEXT: andb $1, %al
+; X32-NEXT: kmovd %eax, %k3
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $14, %eax
+; X32-NEXT: andl $3, %eax
+; X32-NEXT: kmovd %eax, %k4
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $15, %eax
+; X32-NEXT: andl $1, %eax
+; X32-NEXT: kmovd %eax, %k5
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $16, %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k6
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: andb $15, %bl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: shrb $2, %al
+; X32-NEXT: kmovd %eax, %k7
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $52, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $12, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $51, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $13, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $50, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $14, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $49, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $15, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $48, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $16, %k0, %k1
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $47, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $17, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $46, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $18, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $45, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $19, %k0, %k1
+; X32-NEXT: shrb $3, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $44, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $20, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $4, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $43, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $21, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $5, %bl
+; X32-NEXT: andb $1, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $42, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $22, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $6, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $41, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $23, %k0, %k1
+; X32-NEXT: shrb $7, %dl
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $40, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $24, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $24, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $39, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $25, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: andb $2, %bl
+; X32-NEXT: shrb %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $38, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $26, %k0, %k1
; X32-NEXT: andb $15, %dl
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: shrb $2, %bl
-; X32-NEXT: kmovd %ebx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $37, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $27, %k0, %k1
; X32-NEXT: shrb $3, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpslld $24, %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $4, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastd %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $36, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $28, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $28, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $35, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $29, %k0, %k1
; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $5, %dl
+; X32-NEXT: shrl $29, %edx
; X32-NEXT: andb $1, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpsllq $40, %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $6, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpsllq $56, %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $24, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastq %xmm1, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andb $2, %dl
-; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6]
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm5, %ymm2, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm4[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm4, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm4[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpbroadcastd %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; X32-NEXT: vpblendvb %ymm0, %ymm4, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm4[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm1, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $29, %ecx
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2]
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; X32-NEXT: vpblendvb %ymm4, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
-; X32-NEXT: vpblendvb %ymm4, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrl $31, %eax
-; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
-; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k1, %zmm7
-; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm7, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $34, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $30, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $30, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $33, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $31, %k0, %k1
+; X32-NEXT: shrl $31, %ecx
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $32, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $32, %k0, %k1
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $31, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $33, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andb $2, %cl
; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $30, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $34, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $29, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $35, %k0, %k1
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $28, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $36, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $4, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $27, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $37, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $5, %cl
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $26, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $38, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $6, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $25, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $39, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $24, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $40, %k0, %k1
; X32-NEXT: movb %ah, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: andb $2, %cl
; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
; X32-NEXT: movb %ah, %cl
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k3
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $12, %ecx
-; X32-NEXT: andl $15, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k4
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $13, %ecx
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $14, %ecx
-; X32-NEXT: andl $3, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $15, %ecx
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k5
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $16, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $2, %dl
; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k6
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $15, %dl
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: shrb $2, %bl
-; X32-NEXT: kmovd %ebx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ebx, %k7
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $23, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $41, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $22, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $42, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $21, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $43, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k3
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k4
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $20, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $44, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $19, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $45, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $18, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $46, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $17, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $47, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $16, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $48, %k0, %k1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $15, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $49, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $14, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $50, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $13, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $51, %k0, %k1
; X32-NEXT: shrb $3, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $12, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k4
+; X32-NEXT: kshiftrq $52, %k4, %k0
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $4, %dl
; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kxorq %k1, %k0, %k5
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $5, %dl
; X32-NEXT: andb $1, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k6
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $6, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k7
; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k0
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $24, %ecx
; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $2, %dl
; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vpblendvb %ymm5, %ymm6, %ymm0, %ymm0
+; X32-NEXT: kmovd %edx, %k2
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; X32-NEXT: vpblendvb %ymm2, %ymm5, %ymm1, %ymm2
+; X32-NEXT: kmovd %edx, %k3
+; X32-NEXT: kshiftlq $63, %k5, %k5
+; X32-NEXT: kshiftrq $11, %k5, %k5
+; X32-NEXT: kxorq %k4, %k5, %k4
+; X32-NEXT: kshiftrq $53, %k4, %k5
+; X32-NEXT: kxorq %k6, %k5, %k5
+; X32-NEXT: kshiftlq $63, %k5, %k5
+; X32-NEXT: kshiftrq $10, %k5, %k5
+; X32-NEXT: kxorq %k4, %k5, %k5
+; X32-NEXT: kshiftrq $54, %k5, %k4
+; X32-NEXT: kxorq %k7, %k4, %k6
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: kmovd %ecx, %k4
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $29, %ecx
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k2
-; X32-NEXT: vpmovm2b %k2, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4]
-; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm2, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: kmovd %ecx, %k7
+; X32-NEXT: kshiftlq $63, %k6, %k6
+; X32-NEXT: kshiftrq $9, %k6, %k6
+; X32-NEXT: kxorq %k5, %k6, %k5
+; X32-NEXT: kshiftrq $55, %k5, %k6
+; X32-NEXT: kxorq %k0, %k6, %k0
+; X32-NEXT: kshiftlq $63, %k0, %k0
+; X32-NEXT: kshiftrq $8, %k0, %k0
+; X32-NEXT: kxorq %k5, %k0, %k0
+; X32-NEXT: kshiftrq $56, %k0, %k5
+; X32-NEXT: kxorq %k1, %k5, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpbroadcastd %xmm2, %xmm2
-; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm2, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k1, %zmm2
-; X32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2]
-; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm2, %ymm1
+; X32-NEXT: kmovd %ecx, %k5
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpbroadcastw %xmm2, %xmm2
-; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k6
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $7, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $57, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $6, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $58, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $5, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $59, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $4, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $60, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $3, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $61, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $2, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $62, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
; X32-NEXT: shrl $31, %eax
-; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: kmovd %eax, %k2
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $1, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftlq $1, %k0, %k0
; X32-NEXT: kshiftrq $1, %k0, %k0
-; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k2, %k1
; X32-NEXT: korq %k1, %k0, %k1
-; X32-NEXT: vpbroadcastb %eax, %zmm3 {%k1}
-; X32-NEXT: vmovdqa64 %zmm3, %zmm0
+; X32-NEXT: vpbroadcastb %eax, %zmm0 {%k1}
+; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
@@ -850,710 +674,537 @@ define <8 x i64> @test_mm512_maskz_set1_
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %ebx, -8
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andb $2, %cl
-; X32-NEXT: shrb %cl
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: kmovd %eax, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: kshiftrq $1, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $62, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $2, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: andb $15, %al
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpslld $24, %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $4, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpbroadcastd %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $5, %cl
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpsllq $40, %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $6, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpsllq $56, %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movb %ah, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: andb $2, %cl
-; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movb %ah, %cl
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $61, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $3, %k0, %k1
+; X32-NEXT: shrb $3, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $60, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $4, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $4, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $59, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $5, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $5, %al
+; X32-NEXT: andb $1, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $58, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $6, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $6, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $57, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $7, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $7, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $56, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $8, %k0, %k1
+; X32-NEXT: movb %ch, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $55, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $9, %k0, %k1
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $54, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $10, %k0, %k1
+; X32-NEXT: movb %ch, %al
+; X32-NEXT: andb $15, %al
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $12, %ecx
-; X32-NEXT: andl $15, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $13, %ecx
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $14, %ecx
-; X32-NEXT: andl $3, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $15, %ecx
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $16, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andb $2, %dl
-; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andb $15, %dl
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $53, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $11, %k0, %k1
+; X32-NEXT: shrb $3, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $12, %eax
+; X32-NEXT: andl $15, %eax
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $13, %eax
+; X32-NEXT: andb $1, %al
+; X32-NEXT: kmovd %eax, %k3
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $14, %eax
+; X32-NEXT: andl $3, %eax
+; X32-NEXT: kmovd %eax, %k4
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $15, %eax
+; X32-NEXT: andl $1, %eax
+; X32-NEXT: kmovd %eax, %k5
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $16, %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k6
; X32-NEXT: movl %edx, %ebx
-; X32-NEXT: shrb $2, %bl
-; X32-NEXT: kmovd %ebx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $3, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $4, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $5, %dl
-; X32-NEXT: andb $1, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $6, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $24, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andb $2, %dl
-; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm5, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; X32-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm4[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $29, %ecx
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
-; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrl $31, %eax
-; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
-; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: andb $15, %bl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: shrb $2, %al
+; X32-NEXT: kmovd %eax, %k7
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k1, %zmm7
-; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $52, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $12, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $51, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $13, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $50, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $14, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $49, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $15, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $48, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $16, %k0, %k1
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $47, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $17, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $46, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $18, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $45, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $19, %k0, %k1
+; X32-NEXT: shrb $3, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $44, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $20, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $4, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $43, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $21, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $5, %bl
+; X32-NEXT: andb $1, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $42, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $22, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $6, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $41, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $23, %k0, %k1
+; X32-NEXT: shrb $7, %dl
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $40, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $24, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $24, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $39, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $25, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: andb $2, %bl
+; X32-NEXT: shrb %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $38, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $26, %k0, %k1
+; X32-NEXT: andb $15, %dl
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $2, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $37, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $27, %k0, %k1
+; X32-NEXT: shrb $3, %dl
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $36, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $28, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $28, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $35, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $29, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $29, %edx
+; X32-NEXT: andb $1, %dl
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $34, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $30, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $30, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $33, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $31, %k0, %k1
+; X32-NEXT: shrl $31, %ecx
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $32, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $32, %k0, %k1
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $31, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $33, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andb $2, %cl
; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $30, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $34, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $29, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $35, %k0, %k1
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $28, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $36, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $4, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $27, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $37, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $5, %cl
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $26, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $38, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $6, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $25, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $39, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $24, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $40, %k0, %k1
; X32-NEXT: movb %ah, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: andb $2, %cl
; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
; X32-NEXT: movb %ah, %cl
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k3
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $12, %ecx
-; X32-NEXT: andl $15, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k4
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $13, %ecx
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $14, %ecx
-; X32-NEXT: andl $3, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $15, %ecx
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k5
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $16, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $2, %dl
; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k6
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $15, %dl
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: shrb $2, %bl
-; X32-NEXT: kmovd %ebx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ebx, %k7
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $23, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $41, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $22, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $42, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $21, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $43, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k3
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k4
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $20, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $44, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $19, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $45, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $18, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $46, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $17, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $47, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $16, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $48, %k0, %k1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $15, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $49, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $14, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $50, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $13, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $51, %k0, %k1
; X32-NEXT: shrb $3, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $12, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k4
+; X32-NEXT: kshiftrq $52, %k4, %k0
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $4, %dl
; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kxorq %k1, %k0, %k5
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $5, %dl
; X32-NEXT: andb $1, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k6
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $6, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k7
; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vpblendvb %ymm6, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k0
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $24, %ecx
; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; X32-NEXT: vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm2
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $2, %dl
; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vextracti64x4 $1, %zmm2, %ymm1
-; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm0, %ymm1
+; X32-NEXT: kmovd %edx, %k2
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k0
+; X32-NEXT: kmovd %edx, %k3
+; X32-NEXT: kshiftlq $63, %k5, %k5
+; X32-NEXT: kshiftrq $11, %k5, %k5
+; X32-NEXT: kxorq %k4, %k5, %k4
+; X32-NEXT: kshiftrq $53, %k4, %k5
+; X32-NEXT: kxorq %k6, %k5, %k5
+; X32-NEXT: kshiftlq $63, %k5, %k5
+; X32-NEXT: kshiftrq $10, %k5, %k5
+; X32-NEXT: kxorq %k4, %k5, %k5
+; X32-NEXT: kshiftrq $54, %k5, %k4
+; X32-NEXT: kxorq %k7, %k4, %k6
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: kmovd %ecx, %k4
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $29, %ecx
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k2
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k3
-; X32-NEXT: vpmovm2b %k3, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpbroadcastw %xmm2, %xmm2
-; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k1, %zmm2
-; X32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4]
-; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: kmovd %ecx, %k7
+; X32-NEXT: kshiftlq $63, %k6, %k6
+; X32-NEXT: kshiftrq $9, %k6, %k6
+; X32-NEXT: kxorq %k5, %k6, %k5
+; X32-NEXT: kshiftrq $55, %k5, %k6
+; X32-NEXT: kxorq %k0, %k6, %k0
+; X32-NEXT: kshiftlq $63, %k0, %k0
+; X32-NEXT: kshiftrq $8, %k0, %k0
+; X32-NEXT: kxorq %k5, %k0, %k0
+; X32-NEXT: kshiftrq $56, %k0, %k5
+; X32-NEXT: kxorq %k1, %k5, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpbroadcastd %xmm2, %xmm2
-; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k2, %zmm2
-; X32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2]
-; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1
+; X32-NEXT: kmovd %ecx, %k5
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm2
-; X32-NEXT: vpbroadcastw %xmm2, %xmm2
-; X32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; X32-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: kmovd %ecx, %k6
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $7, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $57, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $6, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $58, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $5, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $59, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $4, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $60, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $3, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $61, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $2, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $62, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
; X32-NEXT: shrl $31, %eax
-; X32-NEXT: kmovd %eax, %k0
+; X32-NEXT: kmovd %eax, %k2
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
-; X32-NEXT: vpmovb2m %zmm0, %k1
-; X32-NEXT: kshiftlq $1, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: kshiftrq $1, %k1, %k1
-; X32-NEXT: kshiftlq $63, %k0, %k0
-; X32-NEXT: korq %k0, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftlq $1, %k0, %k0
+; X32-NEXT: kshiftrq $1, %k0, %k0
+; X32-NEXT: kshiftlq $63, %k2, %k1
+; X32-NEXT: korq %k1, %k0, %k1
; X32-NEXT: vpbroadcastb %eax, %zmm0 {%k1} {z}
+; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
@@ -2057,719 +1708,541 @@ define i64 @test_mm512_mask_test_epi8_ma
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
-; X32-NEXT: andl $-64, %esp
-; X32-NEXT: subl $256, %esp # imm = 0x100
+; X32-NEXT: pushl %esi
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) # 64-byte Spill
-; X32-NEXT: vmovaps %zmm0, {{[0-9]+}}(%esp) # 64-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andb $2, %cl
-; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: kmovd %eax, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpslld $24, %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $4, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpbroadcastd %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $5, %cl
-; X32-NEXT: andb $1, %cl
+; X32-NEXT: movl 8(%ebp), %ecx
; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpsllq $40, %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $6, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movb %ah, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: andb $2, %cl
-; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movb %ah, %cl
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: kshiftrq $1, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $62, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $2, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: andb $15, %al
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $12, %ecx
-; X32-NEXT: andl $15, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $13, %ecx
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $14, %ecx
-; X32-NEXT: andl $3, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $15, %ecx
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $16, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andb $2, %dl
-; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $61, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $3, %k0, %k1
+; X32-NEXT: shrb $3, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $60, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $4, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $4, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $59, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $5, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $5, %al
+; X32-NEXT: andb $1, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $58, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $6, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $6, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $57, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $7, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $7, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $56, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $8, %k0, %k1
+; X32-NEXT: movb %ch, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: movb %ch, %al
+; X32-NEXT: andb $15, %al
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k3
+; X32-NEXT: shrb $3, %al
+; X32-NEXT: kmovd %eax, %k4
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $13, %eax
+; X32-NEXT: andb $1, %al
+; X32-NEXT: kmovd %eax, %k5
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $16, %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k6
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: andb $15, %bl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: shrb $2, %al
+; X32-NEXT: kmovd %eax, %k7
+; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $55, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $9, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $54, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $10, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $53, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $11, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $52, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $12, %k0, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $51, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $13, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $50, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $14, %k0, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $49, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $15, %k0, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $48, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $16, %k0, %k1
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $47, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $17, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $46, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $18, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $45, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $19, %k0, %k1
+; X32-NEXT: shrb $3, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $44, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $20, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $4, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $43, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $21, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $5, %bl
+; X32-NEXT: andb $1, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $42, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $22, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $6, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $41, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $23, %k0, %k1
+; X32-NEXT: shrb $7, %dl
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $40, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $24, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $24, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $39, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $25, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: andb $2, %bl
+; X32-NEXT: shrb %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $38, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $26, %k0, %k1
; X32-NEXT: andb $15, %dl
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: shrb $2, %bl
-; X32-NEXT: kmovd %ebx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $37, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $27, %k0, %k1
; X32-NEXT: shrb $3, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $4, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $36, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $28, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $28, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $35, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $29, %k0, %k1
; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $5, %dl
+; X32-NEXT: shrl $29, %edx
; X32-NEXT: andb $1, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $6, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $24, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andb $2, %dl
-; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $29, %ecx
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrl $31, %eax
-; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
-; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k1, %zmm7
-; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm7, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $34, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $30, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $30, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $33, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $31, %k0, %k1
+; X32-NEXT: shrl $31, %ecx
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $32, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $32, %k0, %k1
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $31, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $33, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andb $2, %cl
; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $30, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $34, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $29, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $35, %k0, %k1
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $28, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $36, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $4, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $27, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $37, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $5, %cl
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $26, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $38, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $6, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $25, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $39, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $24, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $40, %k0, %k1
; X32-NEXT: movb %ah, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: andb $2, %cl
; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
; X32-NEXT: movb %ah, %cl
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k3
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $12, %ecx
-; X32-NEXT: andl $15, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $13, %ecx
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $14, %ecx
-; X32-NEXT: andl $3, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k4
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $15, %ecx
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrl $13, %ecx
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k5
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $16, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $2, %dl
; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k6
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $15, %dl
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: shrb $2, %bl
-; X32-NEXT: kmovd %ebx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ebx, %k7
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $23, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $41, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $22, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $42, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $21, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $43, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $20, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $44, %k0, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $19, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $45, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $18, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $46, %k0, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $17, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $47, %k0, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $16, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $48, %k0, %k1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $15, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $49, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $14, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $50, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $13, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $51, %k0, %k1
; X32-NEXT: shrb $3, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $12, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $52, %k0, %k1
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $4, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $5, %dl
; X32-NEXT: andb $1, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $6, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k3
; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k4
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $24, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k5
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $2, %dl
; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: kmovd %edx, %k6
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k0
-; X32-NEXT: vpmovb2m %zmm0, %k1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: kmovd %edx, %k7
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $11, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $53, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $10, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $54, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $9, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $55, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $8, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $56, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $7, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $57, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $6, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $58, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $5, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $59, %k0, %k1
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm6
-; X32-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $4, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $60, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm5
-; X32-NEXT: vpbroadcastd %xmm5, %xmm5
-; X32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm5, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $3, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $61, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $29, %ecx
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm4
-; X32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2]
-; X32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
-; X32-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $2, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $62, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm3
-; X32-NEXT: vpbroadcastw %xmm3, %xmm3
-; X32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $1, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftlq $1, %k0, %k0
; X32-NEXT: kshiftrq $1, %k0, %k0
; X32-NEXT: shrl $31, %eax
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: korq %k1, %k0, %k1
-; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm0 # 64-byte Reload
-; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm1 # 64-byte Reload
-; X32-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1}
-; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1}
+; X32-NEXT: kmovq %k0, (%esp)
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: leal -4(%ebp), %esp
+; X32-NEXT: leal -8(%ebp), %esp
+; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: vzeroupper
@@ -2882,719 +2355,541 @@ define i64 @test_mm512_mask_testn_epi8_m
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: pushl %ebx
-; X32-NEXT: andl $-64, %esp
-; X32-NEXT: subl $256, %esp # imm = 0x100
+; X32-NEXT: pushl %esi
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_offset %esi, -16
; X32-NEXT: .cfi_offset %ebx, -12
-; X32-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) # 64-byte Spill
-; X32-NEXT: vmovaps %zmm0, {{[0-9]+}}(%esp) # 64-byte Spill
-; X32-NEXT: movl 8(%ebp), %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andb $2, %cl
-; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: kmovd %eax, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpbroadcastw %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpslld $24, %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $4, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpbroadcastd %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $5, %cl
-; X32-NEXT: andb $1, %cl
+; X32-NEXT: movl 8(%ebp), %ecx
; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpsllq $40, %xmm1, %xmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $6, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movb %ah, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: andb $2, %cl
-; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movb %ah, %cl
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: kshiftrq $1, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $62, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $2, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: andb $15, %al
+; X32-NEXT: movl %eax, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $12, %ecx
-; X32-NEXT: andl $15, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $13, %ecx
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $14, %ecx
-; X32-NEXT: andl $3, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $15, %ecx
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $16, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andb $2, %dl
-; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $61, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $3, %k0, %k1
+; X32-NEXT: shrb $3, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $60, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $4, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $4, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $59, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $5, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $5, %al
+; X32-NEXT: andb $1, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $58, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $6, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $6, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $57, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $7, %k0, %k1
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrb $7, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $56, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $8, %k0, %k1
+; X32-NEXT: movb %ch, %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: movb %ch, %al
+; X32-NEXT: andb $15, %al
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k3
+; X32-NEXT: shrb $3, %al
+; X32-NEXT: kmovd %eax, %k4
+; X32-NEXT: movl %ecx, %eax
+; X32-NEXT: shrl $13, %eax
+; X32-NEXT: andb $1, %al
+; X32-NEXT: kmovd %eax, %k5
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $16, %edx
+; X32-NEXT: movl %edx, %eax
+; X32-NEXT: andb $2, %al
+; X32-NEXT: shrb %al
+; X32-NEXT: kmovd %eax, %k6
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: andb $15, %bl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: shrb $2, %al
+; X32-NEXT: kmovd %eax, %k7
+; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $55, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $9, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $54, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $10, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $53, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $11, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $52, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $12, %k0, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $51, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $13, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $50, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $14, %k0, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $49, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $15, %k0, %k1
+; X32-NEXT: movl %ecx, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $48, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $16, %k0, %k1
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $47, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $17, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $46, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $18, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $45, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $19, %k0, %k1
+; X32-NEXT: shrb $3, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $44, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $20, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $4, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $43, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $21, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $5, %bl
+; X32-NEXT: andb $1, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $42, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $22, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $6, %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $41, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $23, %k0, %k1
+; X32-NEXT: shrb $7, %dl
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $40, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $24, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $24, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $39, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $25, %k0, %k1
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: andb $2, %bl
+; X32-NEXT: shrb %bl
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $38, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $26, %k0, %k1
; X32-NEXT: andb $15, %dl
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: shrb $2, %bl
-; X32-NEXT: kmovd %ebx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ebx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $37, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $27, %k0, %k1
; X32-NEXT: shrb $3, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $4, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $36, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $28, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $28, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $35, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $29, %k0, %k1
; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $5, %dl
+; X32-NEXT: shrl $29, %edx
; X32-NEXT: andb $1, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $6, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $24, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andb $2, %dl
-; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: andb $15, %cl
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $29, %ecx
-; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
-; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: shrl $31, %eax
-; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
-; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0
-; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl 12(%ebp), %eax
-; X32-NEXT: kmovd %eax, %k1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; X32-NEXT: vpmovm2b %k1, %zmm7
-; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm7, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $34, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $30, %k0, %k1
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrl $30, %edx
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $33, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $31, %k0, %k1
+; X32-NEXT: shrl $31, %ecx
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $32, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $32, %k0, %k1
+; X32-NEXT: kmovd %eax, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $31, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $33, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andb $2, %cl
; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $30, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $34, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $29, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $35, %k0, %k1
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $28, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $36, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $4, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $27, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $37, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $5, %cl
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $26, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $38, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $6, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $25, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $39, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $24, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $40, %k0, %k1
; X32-NEXT: movb %ah, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: andb $2, %cl
; X32-NEXT: shrb %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
; X32-NEXT: movb %ah, %cl
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k3
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $12, %ecx
-; X32-NEXT: andl $15, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k4
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $13, %ecx
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $14, %ecx
-; X32-NEXT: andl $3, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $15, %ecx
-; X32-NEXT: andl $1, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k5
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $16, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $2, %dl
; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllw $8, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k6
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $15, %dl
; X32-NEXT: movl %edx, %ebx
; X32-NEXT: shrb $2, %bl
-; X32-NEXT: kmovd %ebx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ebx, %k7
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $23, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $41, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $22, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $42, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $21, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $43, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $20, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $44, %k0, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $12, %esi
+; X32-NEXT: andl $15, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $19, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $45, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $18, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $46, %k0, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $14, %esi
+; X32-NEXT: andl $3, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $17, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $47, %k0, %k1
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: shrl $15, %esi
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: kmovd %esi, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $16, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $48, %k0, %k1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $15, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $49, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $14, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $50, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $13, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $51, %k0, %k1
; X32-NEXT: shrb $3, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslld $24, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $12, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $52, %k0, %k1
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $4, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastd %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $5, %dl
; X32-NEXT: andb $1, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $40, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k2
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $6, %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %edx, %k3
; X32-NEXT: shrb $7, %cl
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpsllq $56, %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k4
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $24, %ecx
-; X32-NEXT: kmovd %ecx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpbroadcastq %xmm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k5
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: andb $2, %dl
; X32-NEXT: shrb %dl
-; X32-NEXT: kmovd %edx, %k1
-; X32-NEXT: vpmovm2b %k1, %zmm0
-; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: kmovd %edx, %k6
; X32-NEXT: andb $15, %cl
; X32-NEXT: movl %ecx, %edx
; X32-NEXT: shrb $2, %dl
-; X32-NEXT: kmovd %edx, %k0
-; X32-NEXT: vpmovb2m %zmm0, %k1
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vpbroadcastw %xmm0, %xmm0
-; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT: vpmovm2b %k1, %zmm1
-; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
-; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
-; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: kmovd %edx, %k7
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $11, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $53, %k0, %k1
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $10, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $54, %k0, %k1
+; X32-NEXT: kxorq %k3, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $9, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $55, %k0, %k1
+; X32-NEXT: kxorq %k4, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $8, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $56, %k0, %k1
+; X32-NEXT: kxorq %k5, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $7, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $57, %k0, %k1
+; X32-NEXT: kxorq %k6, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $6, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $58, %k0, %k1
+; X32-NEXT: kxorq %k7, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $5, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $59, %k0, %k1
; X32-NEXT: shrb $3, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm1
-; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
-; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm6
-; X32-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $4, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $60, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $28, %ecx
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm5
-; X32-NEXT: vpbroadcastd %xmm5, %xmm5
-; X32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
-; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm5, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $3, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $61, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $29, %ecx
; X32-NEXT: andb $1, %cl
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm4
-; X32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2]
-; X32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
-; X32-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm0
-; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $2, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
+; X32-NEXT: kshiftrq $62, %k0, %k1
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: shrl $30, %ecx
-; X32-NEXT: kmovd %ecx, %k0
-; X32-NEXT: vpmovm2b %k0, %zmm3
-; X32-NEXT: vpbroadcastw %xmm3, %xmm3
-; X32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
-; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
-; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kmovd %ecx, %k2
+; X32-NEXT: kxorq %k2, %k1, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: kshiftrq $1, %k1, %k1
+; X32-NEXT: kxorq %k0, %k1, %k0
; X32-NEXT: kshiftlq $1, %k0, %k0
; X32-NEXT: kshiftrq $1, %k0, %k0
; X32-NEXT: shrl $31, %eax
; X32-NEXT: kmovd %eax, %k1
; X32-NEXT: kshiftlq $63, %k1, %k1
; X32-NEXT: korq %k1, %k0, %k1
-; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm0 # 64-byte Reload
-; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm1 # 64-byte Reload
-; X32-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1}
-; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1}
+; X32-NEXT: kmovq %k0, (%esp)
+; X32-NEXT: movl (%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: leal -4(%ebp), %esp
+; X32-NEXT: leal -8(%ebp), %esp
+; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
; X32-NEXT: popl %ebp
; X32-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=320120&r1=320119&r2=320120&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Thu Dec 7 16:16:09 2017
@@ -1795,753 +1795,574 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
-; AVX512F-32-NEXT: subl $60, %esp
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT: subl $68, %esp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 80
; AVX512F-32-NEXT: .cfi_offset %esi, -12
; AVX512F-32-NEXT: .cfi_offset %ebx, -8
-; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6
-; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: andb $15, %bl
-; AVX512F-32-NEXT: movl %ecx, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k0
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrl $16, %eax
; AVX512F-32-NEXT: movl %ebx, %edx
-; AVX512F-32-NEXT: shrb $2, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: shrb $4, %bl
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
-; AVX512F-32-NEXT: kmovd %ecx, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3
-; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: kmovd %edx, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpslld $24, %xmm3, %xmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: kmovd %ebx, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpsllq $40, %xmm3, %xmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: andb $2, %al
-; AVX512F-32-NEXT: shrb %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %dl
; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl %edx, %ecx
; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $12, %eax
-; AVX512F-32-NEXT: andl $15, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $13, %eax
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $14, %eax
-; AVX512F-32-NEXT: andl $3, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $15, %eax
-; AVX512F-32-NEXT: andl $1, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $16, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
+; AVX512F-32-NEXT: kmovd %edx, %k2
+; AVX512F-32-NEXT: movb %bh, %dl
+; AVX512F-32-NEXT: andb $15, %dl
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k3
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k4
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $6, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k6
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $7, %cl
+; AVX512F-32-NEXT: kmovd %ebx, %k5
+; AVX512F-32-NEXT: kshiftrq $1, %k5, %k7
+; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $62, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k5, %k1, %k7
+; AVX512F-32-NEXT: kshiftrq $2, %k7, %k1
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k5
+; AVX512F-32-NEXT: movb %bh, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl %ebx, %esi
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $61, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
+; AVX512F-32-NEXT: kshiftrq $3, %k7, %k2
+; AVX512F-32-NEXT: kxorq %k0, %k2, %k0
+; AVX512F-32-NEXT: kmovd %ecx, %k2
+; AVX512F-32-NEXT: movl %edx, %ecx
+; AVX512F-32-NEXT: shrb $2, %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $60, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k7, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $4, %k0, %k7
+; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
+; AVX512F-32-NEXT: kmovd %edx, %k3
; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
+; AVX512F-32-NEXT: andb $15, %dl
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $59, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $5, %k7, %k0
+; AVX512F-32-NEXT: kxorq %k4, %k0, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $13, %ecx
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $58, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k7
+; AVX512F-32-NEXT: kshiftrq $6, %k7, %k4
+; AVX512F-32-NEXT: kxorq %k6, %k4, %k6
+; AVX512F-32-NEXT: kmovd %ecx, %k4
; AVX512F-32-NEXT: movl %eax, %ebx
-; AVX512F-32-NEXT: andb $15, %bl
-; AVX512F-32-NEXT: movl %ebx, %edx
-; AVX512F-32-NEXT: shrb $2, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $4, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $5, %dl
-; AVX512F-32-NEXT: andb $1, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
+; AVX512F-32-NEXT: andb $2, %bl
+; AVX512F-32-NEXT: shrb %bl
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $57, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $7, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
+; AVX512F-32-NEXT: kmovd %ebx, %k5
+; AVX512F-32-NEXT: movl %edx, %ecx
+; AVX512F-32-NEXT: shrb $2, %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $56, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $8, %k7, %k6
+; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
+; AVX512F-32-NEXT: kmovd %edx, %k6
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $55, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k7, %k1, %k7
+; AVX512F-32-NEXT: kshiftrq $9, %k7, %k1
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $54, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
+; AVX512F-32-NEXT: kshiftrq $10, %k7, %k2
+; AVX512F-32-NEXT: kxorq %k3, %k2, %k3
+; AVX512F-32-NEXT: kmovd %ecx, %k2
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: movl %esi, %edx
+; AVX512F-32-NEXT: shrl $12, %edx
+; AVX512F-32-NEXT: andl $15, %edx
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $53, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k7, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $11, %k3, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $52, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k3, %k0, %k3
+; AVX512F-32-NEXT: kshiftrq $12, %k3, %k0
+; AVX512F-32-NEXT: kmovd %edx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: movl %eax, %edx
; AVX512F-32-NEXT: shrb $6, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $14, %ecx
+; AVX512F-32-NEXT: andl $3, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $51, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k3, %k7, %k3
+; AVX512F-32-NEXT: kshiftrq $13, %k3, %k7
+; AVX512F-32-NEXT: kxorq %k4, %k7, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $15, %ecx
+; AVX512F-32-NEXT: andl $1, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $50, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
+; AVX512F-32-NEXT: kshiftrq $14, %k3, %k4
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $49, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
+; AVX512F-32-NEXT: kshiftrq $15, %k3, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $48, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
+; AVX512F-32-NEXT: kshiftrq $16, %k3, %k4
+; AVX512F-32-NEXT: kmovd %eax, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
+; AVX512F-32-NEXT: kmovd %edx, %k7
+; AVX512F-32-NEXT: movl %esi, %edx
+; AVX512F-32-NEXT: shrl $24, %edx
; AVX512F-32-NEXT: # kill: def %al killed %al killed %eax def %eax
; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $24, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $47, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
+; AVX512F-32-NEXT: kshiftrq $17, %k3, %k4
+; AVX512F-32-NEXT: kxorq %k5, %k4, %k4
+; AVX512F-32-NEXT: kmovd %eax, %k5
; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $28, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm4, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: movl %ecx, %esi
-; AVX512F-32-NEXT: shrl $29, %eax
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
-; AVX512F-32-NEXT: shrl $30, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
-; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
-; AVX512F-32-NEXT: shrl $31, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: kmovd %ecx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm7
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andb $2, %al
-; AVX512F-32-NEXT: shrb %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %edx
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $46, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $18, %k4, %k3
+; AVX512F-32-NEXT: kxorq %k6, %k3, %k6
+; AVX512F-32-NEXT: kmovd %edx, %k3
+; AVX512F-32-NEXT: # kill: def %dl killed %dl killed %edx def %edx
; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $4, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movb %ch, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastq %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
; AVX512F-32-NEXT: andb $2, %al
; AVX512F-32-NEXT: shrb %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movb %ch, %dl
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $45, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k4, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $19, %k6, %k4
+; AVX512F-32-NEXT: kxorq %k1, %k4, %k1
+; AVX512F-32-NEXT: kmovd %eax, %k4
+; AVX512F-32-NEXT: movl %edx, %ecx
; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $12, %eax
-; AVX512F-32-NEXT: andl $15, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $13, %eax
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $14, %eax
-; AVX512F-32-NEXT: andl $3, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $15, %eax
-; AVX512F-32-NEXT: andl $1, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: shrl $16, %ebx
-; AVX512F-32-NEXT: kmovd %ebx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $44, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k6, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $20, %k1, %k6
+; AVX512F-32-NEXT: kxorq %k2, %k6, %k6
+; AVX512F-32-NEXT: kmovd %edx, %k2
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: andb $15, %al
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $43, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
+; AVX512F-32-NEXT: kshiftrq $21, %k1, %k6
+; AVX512F-32-NEXT: kxorq %k0, %k6, %k6
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $29, %ecx
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $42, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k1, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $22, %k6, %k1
+; AVX512F-32-NEXT: kxorq %k7, %k1, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: movl %ebx, %edx
; AVX512F-32-NEXT: andb $2, %dl
; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $41, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $23, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
+; AVX512F-32-NEXT: kmovd %edx, %k5
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $2, %al
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $40, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $24, %k7, %k6
+; AVX512F-32-NEXT: kxorq %k3, %k6, %k3
+; AVX512F-32-NEXT: kmovd %eax, %k6
+; AVX512F-32-NEXT: movb %bh, %al
; AVX512F-32-NEXT: andb $15, %al
-; AVX512F-32-NEXT: movl %eax, %edx
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $39, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k7, %k3, %k7
+; AVX512F-32-NEXT: kshiftrq $25, %k7, %k3
+; AVX512F-32-NEXT: kxorq %k4, %k3, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k3
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $38, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k7
+; AVX512F-32-NEXT: kshiftrq $26, %k7, %k4
+; AVX512F-32-NEXT: kxorq %k2, %k4, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k4
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: movl %esi, %edx
+; AVX512F-32-NEXT: shrl $28, %edx
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $37, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $27, %k2, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $36, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k2, %k0, %k2
+; AVX512F-32-NEXT: kshiftrq $28, %k2, %k0
+; AVX512F-32-NEXT: kmovd %edx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %ebx, %edx
+; AVX512F-32-NEXT: shrb $6, %dl
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $30, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $35, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k2, %k7, %k2
+; AVX512F-32-NEXT: kshiftrq $29, %k2, %k7
+; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $31, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $34, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $30, %k1, %k2
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $33, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k1, %k2, %k1
+; AVX512F-32-NEXT: kshiftrq $31, %k1, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $32, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k1, %k2, %k1
+; AVX512F-32-NEXT: kshiftrq $32, %k1, %k2
+; AVX512F-32-NEXT: kmovd %ebx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT: kmovd %edx, %k7
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $7, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $31, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k1, %k2, %k1
+; AVX512F-32-NEXT: kshiftrq $33, %k1, %k2
+; AVX512F-32-NEXT: kxorq %k5, %k2, %k2
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $30, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k1, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $34, %k2, %k1
+; AVX512F-32-NEXT: kxorq %k6, %k1, %k5
+; AVX512F-32-NEXT: kmovd %ecx, %k6
+; AVX512F-32-NEXT: movb %bh, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
+; AVX512F-32-NEXT: kshiftrq $29, %k5, %k5
+; AVX512F-32-NEXT: kxorq %k2, %k5, %k5
+; AVX512F-32-NEXT: kshiftrq $35, %k5, %k2
+; AVX512F-32-NEXT: kxorq %k3, %k2, %k3
+; AVX512F-32-NEXT: kmovd %ecx, %k2
+; AVX512F-32-NEXT: movl %eax, %ecx
; AVX512F-32-NEXT: shrb $2, %al
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $4, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $28, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k5, %k3, %k5
+; AVX512F-32-NEXT: kshiftrq $36, %k5, %k3
+; AVX512F-32-NEXT: kxorq %k4, %k3, %k4
+; AVX512F-32-NEXT: kmovd %eax, %k3
; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: # kill: def %bl killed %bl killed %ebx def %ebx
-; AVX512F-32-NEXT: shrb $7, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $24, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastq %xmm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
+; AVX512F-32-NEXT: shrl $16, %eax
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $27, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k5, %k4, %k5
+; AVX512F-32-NEXT: kshiftrq $37, %k5, %k4
+; AVX512F-32-NEXT: kxorq %k0, %k4, %k0
+; AVX512F-32-NEXT: kmovd %ecx, %k4
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $13, %ecx
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $26, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k5, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $38, %k0, %k5
+; AVX512F-32-NEXT: kxorq %k7, %k5, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k5
; AVX512F-32-NEXT: movl %eax, %edx
; AVX512F-32-NEXT: andb $2, %dl
; AVX512F-32-NEXT: shrb %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $25, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $39, %k7, %k0
+; AVX512F-32-NEXT: kxorq %k6, %k0, %k6
+; AVX512F-32-NEXT: kmovd %edx, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) # 8-byte Spill
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %edx
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $24, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $40, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k1, %k7, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $12, %ecx
+; AVX512F-32-NEXT: andl $15, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $23, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $41, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k2, %k7, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $14, %ecx
+; AVX512F-32-NEXT: andl $3, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $22, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k6, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $42, %k2, %k6
+; AVX512F-32-NEXT: kxorq %k3, %k6, %k3
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $15, %ecx
+; AVX512F-32-NEXT: andl $1, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $21, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k2, %k3, %k2
+; AVX512F-32-NEXT: kshiftrq $43, %k2, %k3
+; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
+; AVX512F-32-NEXT: kmovd %ecx, %k6
+; AVX512F-32-NEXT: shrb $3, %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $20, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k2, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $44, %k3, %k2
+; AVX512F-32-NEXT: kxorq %k0, %k2, %k0
+; AVX512F-32-NEXT: kmovd %edx, %k2
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $19, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k3, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $45, %k0, %k3
+; AVX512F-32-NEXT: kxorq %k5, %k3, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k3
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $18, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k0, %k4, %k0
+; AVX512F-32-NEXT: kshiftrq $46, %k0, %k4
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k5
+; AVX512F-32-NEXT: kmovd %ecx, %k4
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $6, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
+; AVX512F-32-NEXT: kshiftrq $17, %k5, %k5
+; AVX512F-32-NEXT: kxorq %k0, %k5, %k0
+; AVX512F-32-NEXT: kshiftrq $47, %k0, %k5
+; AVX512F-32-NEXT: kxorq %k6, %k5, %k5
+; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
+; AVX512F-32-NEXT: kshiftrq $16, %k5, %k5
+; AVX512F-32-NEXT: kxorq %k0, %k5, %k0
+; AVX512F-32-NEXT: kshiftrq $48, %k0, %k5
+; AVX512F-32-NEXT: kmovd %eax, %k6
+; AVX512F-32-NEXT: kxorq %k6, %k5, %k6
+; AVX512F-32-NEXT: kmovd %ecx, %k5
+; AVX512F-32-NEXT: movl %ebx, %edx
+; AVX512F-32-NEXT: shrl $24, %edx
+; AVX512F-32-NEXT: # kill: def %al killed %al killed %eax def %eax
+; AVX512F-32-NEXT: shrb $7, %al
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $15, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k0, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $49, %k6, %k0
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload
+; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
+; AVX512F-32-NEXT: kmovd %eax, %k0
+; AVX512F-32-NEXT: movl %edx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $14, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $50, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k1, %k7, %k7
; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: movl %eax, %edx
+; AVX512F-32-NEXT: # kill: def %dl killed %dl killed %edx def %edx
; AVX512F-32-NEXT: andb $15, %dl
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $13, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $51, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
+; AVX512F-32-NEXT: kmovd %eax, %k2
; AVX512F-32-NEXT: movl %edx, %eax
; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $12, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $52, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
+; AVX512F-32-NEXT: kmovd %edx, %k3
; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm4
-; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm1, %ymm1
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $11, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $53, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k4, %k7, %k7
+; AVX512F-32-NEXT: kmovd %eax, %k4
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $10, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $54, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
+; AVX512F-32-NEXT: kmovd %eax, %k5
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $9, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $55, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $8, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k6, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $56, %k0, %k6
+; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $7, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $57, %k0, %k1
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $6, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $58, %k0, %k1
+; AVX512F-32-NEXT: kxorq %k3, %k1, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3
-; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3
-; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $30, %eax
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3
-; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
-; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $5, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $59, %k0, %k1
+; AVX512F-32-NEXT: kxorq %k4, %k1, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $4, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $60, %k0, %k1
+; AVX512F-32-NEXT: kmovd %eax, %k2
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $31, %eax
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $30, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $3, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $61, %k0, %k1
+; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $2, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $62, %k0, %k1
+; AVX512F-32-NEXT: kmovd %ecx, %k2
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $1, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: korq %k1, %k0, %k1
-; AVX512F-32-NEXT: vpcmpeqb %zmm6, %zmm5, %k0 {%k1}
-; AVX512F-32-NEXT: vpcmpgtb %zmm5, %zmm6, %k2 {%k1}
-; AVX512F-32-NEXT: vpcmpleb %zmm6, %zmm5, %k3 {%k1}
-; AVX512F-32-NEXT: vpcmpneqb %zmm6, %zmm5, %k4 {%k1}
-; AVX512F-32-NEXT: vpcmpleb %zmm5, %zmm6, %k5 {%k1}
-; AVX512F-32-NEXT: vpcmpgtb %zmm6, %zmm5, %k1 {%k1}
+; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kmovq %k2, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kmovq %k3, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: kxorq %k0, %k0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: kmovq %k4, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kmovq %k5, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpleb %zmm0, %zmm1, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl %esi, %eax
-; AVX512F-32-NEXT: adcl %ecx, %edx
-; AVX512F-32-NEXT: addl $60, %esp
+; AVX512F-32-NEXT: adcl %ebx, %edx
+; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: popl %ebx
; AVX512F-32-NEXT: vzeroupper
@@ -2679,753 +2500,574 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
-; AVX512F-32-NEXT: subl $60, %esp
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT: subl $68, %esp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 80
; AVX512F-32-NEXT: .cfi_offset %esi, -12
; AVX512F-32-NEXT: .cfi_offset %ebx, -8
-; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6
-; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: andb $15, %bl
-; AVX512F-32-NEXT: movl %ecx, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k0
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrl $16, %eax
; AVX512F-32-NEXT: movl %ebx, %edx
-; AVX512F-32-NEXT: shrb $2, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: shrb $4, %bl
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
-; AVX512F-32-NEXT: kmovd %ecx, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3
-; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: kmovd %edx, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpslld $24, %xmm3, %xmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: kmovd %ebx, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpsllq $40, %xmm3, %xmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7]
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: andb $2, %al
-; AVX512F-32-NEXT: shrb %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %dl
; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl %edx, %ecx
; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $12, %eax
-; AVX512F-32-NEXT: andl $15, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $13, %eax
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $14, %eax
-; AVX512F-32-NEXT: andl $3, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $15, %eax
-; AVX512F-32-NEXT: andl $1, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $16, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
+; AVX512F-32-NEXT: kmovd %edx, %k2
+; AVX512F-32-NEXT: movb %bh, %dl
+; AVX512F-32-NEXT: andb $15, %dl
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k3
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k4
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $6, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k6
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $7, %cl
+; AVX512F-32-NEXT: kmovd %ebx, %k5
+; AVX512F-32-NEXT: kshiftrq $1, %k5, %k7
+; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $62, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k5, %k1, %k7
+; AVX512F-32-NEXT: kshiftrq $2, %k7, %k1
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k5
+; AVX512F-32-NEXT: movb %bh, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl %ebx, %esi
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $61, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
+; AVX512F-32-NEXT: kshiftrq $3, %k7, %k2
+; AVX512F-32-NEXT: kxorq %k0, %k2, %k0
+; AVX512F-32-NEXT: kmovd %ecx, %k2
+; AVX512F-32-NEXT: movl %edx, %ecx
+; AVX512F-32-NEXT: shrb $2, %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $60, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k7, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $4, %k0, %k7
+; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
+; AVX512F-32-NEXT: kmovd %edx, %k3
; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
+; AVX512F-32-NEXT: andb $15, %dl
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $59, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $5, %k7, %k0
+; AVX512F-32-NEXT: kxorq %k4, %k0, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $13, %ecx
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $58, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k7
+; AVX512F-32-NEXT: kshiftrq $6, %k7, %k4
+; AVX512F-32-NEXT: kxorq %k6, %k4, %k6
+; AVX512F-32-NEXT: kmovd %ecx, %k4
; AVX512F-32-NEXT: movl %eax, %ebx
-; AVX512F-32-NEXT: andb $15, %bl
-; AVX512F-32-NEXT: movl %ebx, %edx
-; AVX512F-32-NEXT: shrb $2, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $4, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $5, %dl
-; AVX512F-32-NEXT: andb $1, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
+; AVX512F-32-NEXT: andb $2, %bl
+; AVX512F-32-NEXT: shrb %bl
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $57, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $7, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
+; AVX512F-32-NEXT: kmovd %ebx, %k5
+; AVX512F-32-NEXT: movl %edx, %ecx
+; AVX512F-32-NEXT: shrb $2, %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $56, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $8, %k7, %k6
+; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
+; AVX512F-32-NEXT: kmovd %edx, %k6
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $55, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k7, %k1, %k7
+; AVX512F-32-NEXT: kshiftrq $9, %k7, %k1
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $54, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k7
+; AVX512F-32-NEXT: kshiftrq $10, %k7, %k2
+; AVX512F-32-NEXT: kxorq %k3, %k2, %k3
+; AVX512F-32-NEXT: kmovd %ecx, %k2
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: movl %esi, %edx
+; AVX512F-32-NEXT: shrl $12, %edx
+; AVX512F-32-NEXT: andl $15, %edx
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $53, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k7, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $11, %k3, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $52, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k3, %k0, %k3
+; AVX512F-32-NEXT: kshiftrq $12, %k3, %k0
+; AVX512F-32-NEXT: kmovd %edx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: movl %eax, %edx
; AVX512F-32-NEXT: shrb $6, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $14, %ecx
+; AVX512F-32-NEXT: andl $3, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $51, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k3, %k7, %k3
+; AVX512F-32-NEXT: kshiftrq $13, %k3, %k7
+; AVX512F-32-NEXT: kxorq %k4, %k7, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $15, %ecx
+; AVX512F-32-NEXT: andl $1, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $50, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
+; AVX512F-32-NEXT: kshiftrq $14, %k3, %k4
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $49, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
+; AVX512F-32-NEXT: kshiftrq $15, %k3, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $48, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
+; AVX512F-32-NEXT: kshiftrq $16, %k3, %k4
+; AVX512F-32-NEXT: kmovd %eax, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k4
+; AVX512F-32-NEXT: kmovd %edx, %k7
+; AVX512F-32-NEXT: movl %esi, %edx
+; AVX512F-32-NEXT: shrl $24, %edx
; AVX512F-32-NEXT: # kill: def %al killed %al killed %eax def %eax
; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $24, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $47, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k3
+; AVX512F-32-NEXT: kshiftrq $17, %k3, %k4
+; AVX512F-32-NEXT: kxorq %k5, %k4, %k4
+; AVX512F-32-NEXT: kmovd %eax, %k5
; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $28, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm4, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: movl %ecx, %esi
-; AVX512F-32-NEXT: shrl $29, %eax
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm2
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
-; AVX512F-32-NEXT: shrl $30, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
-; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
-; AVX512F-32-NEXT: shrl $31, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0
-; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: kmovd %ecx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm7
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andb $2, %al
-; AVX512F-32-NEXT: shrb %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %edx
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $46, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k3, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $18, %k4, %k3
+; AVX512F-32-NEXT: kxorq %k6, %k3, %k6
+; AVX512F-32-NEXT: kmovd %edx, %k3
+; AVX512F-32-NEXT: # kill: def %dl killed %dl killed %edx def %edx
; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $4, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movb %ch, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastq %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
; AVX512F-32-NEXT: andb $2, %al
; AVX512F-32-NEXT: shrb %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movb %ch, %dl
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $45, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k4, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $19, %k6, %k4
+; AVX512F-32-NEXT: kxorq %k1, %k4, %k1
+; AVX512F-32-NEXT: kmovd %eax, %k4
+; AVX512F-32-NEXT: movl %edx, %ecx
; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $12, %eax
-; AVX512F-32-NEXT: andl $15, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $13, %eax
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $14, %eax
-; AVX512F-32-NEXT: andl $3, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $15, %eax
-; AVX512F-32-NEXT: andl $1, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: shrl $16, %ebx
-; AVX512F-32-NEXT: kmovd %ebx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $44, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k6, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $20, %k1, %k6
+; AVX512F-32-NEXT: kxorq %k2, %k6, %k6
+; AVX512F-32-NEXT: kmovd %edx, %k2
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: andb $15, %al
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $43, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
+; AVX512F-32-NEXT: kshiftrq $21, %k1, %k6
+; AVX512F-32-NEXT: kxorq %k0, %k6, %k6
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $29, %ecx
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $42, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k1, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $22, %k6, %k1
+; AVX512F-32-NEXT: kxorq %k7, %k1, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: movl %ebx, %edx
; AVX512F-32-NEXT: andb $2, %dl
; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $41, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $23, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
+; AVX512F-32-NEXT: kmovd %edx, %k5
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $2, %al
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $40, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $24, %k7, %k6
+; AVX512F-32-NEXT: kxorq %k3, %k6, %k3
+; AVX512F-32-NEXT: kmovd %eax, %k6
+; AVX512F-32-NEXT: movb %bh, %al
; AVX512F-32-NEXT: andb $15, %al
-; AVX512F-32-NEXT: movl %eax, %edx
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $39, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k7, %k3, %k7
+; AVX512F-32-NEXT: kshiftrq $25, %k7, %k3
+; AVX512F-32-NEXT: kxorq %k4, %k3, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k3
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $38, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k7
+; AVX512F-32-NEXT: kshiftrq $26, %k7, %k4
+; AVX512F-32-NEXT: kxorq %k2, %k4, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k4
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: movl %esi, %edx
+; AVX512F-32-NEXT: shrl $28, %edx
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $37, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $27, %k2, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $36, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k2, %k0, %k2
+; AVX512F-32-NEXT: kshiftrq $28, %k2, %k0
+; AVX512F-32-NEXT: kmovd %edx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %ebx, %edx
+; AVX512F-32-NEXT: shrb $6, %dl
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $30, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $35, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k2, %k7, %k2
+; AVX512F-32-NEXT: kshiftrq $29, %k2, %k7
+; AVX512F-32-NEXT: kxorq %k1, %k7, %k1
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: movl %esi, %ecx
+; AVX512F-32-NEXT: shrl $31, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $34, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $30, %k1, %k2
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $33, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k1, %k2, %k1
+; AVX512F-32-NEXT: kshiftrq $31, %k1, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $32, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k1, %k2, %k1
+; AVX512F-32-NEXT: kshiftrq $32, %k1, %k2
+; AVX512F-32-NEXT: kmovd %ebx, %k7
+; AVX512F-32-NEXT: kxorq %k7, %k2, %k2
+; AVX512F-32-NEXT: kmovd %edx, %k7
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrb $7, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $31, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k1, %k2, %k1
+; AVX512F-32-NEXT: kshiftrq $33, %k1, %k2
+; AVX512F-32-NEXT: kxorq %k5, %k2, %k2
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $30, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k1, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $34, %k2, %k1
+; AVX512F-32-NEXT: kxorq %k6, %k1, %k5
+; AVX512F-32-NEXT: kmovd %ecx, %k6
+; AVX512F-32-NEXT: movb %bh, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
+; AVX512F-32-NEXT: kshiftrq $29, %k5, %k5
+; AVX512F-32-NEXT: kxorq %k2, %k5, %k5
+; AVX512F-32-NEXT: kshiftrq $35, %k5, %k2
+; AVX512F-32-NEXT: kxorq %k3, %k2, %k3
+; AVX512F-32-NEXT: kmovd %ecx, %k2
+; AVX512F-32-NEXT: movl %eax, %ecx
; AVX512F-32-NEXT: shrb $2, %al
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $4, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $28, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k5, %k3, %k5
+; AVX512F-32-NEXT: kshiftrq $36, %k5, %k3
+; AVX512F-32-NEXT: kxorq %k4, %k3, %k4
+; AVX512F-32-NEXT: kmovd %eax, %k3
; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: # kill: def %bl killed %bl killed %ebx def %ebx
-; AVX512F-32-NEXT: shrb $7, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $24, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpbroadcastq %xmm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
+; AVX512F-32-NEXT: shrl $16, %eax
+; AVX512F-32-NEXT: shrb $3, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $27, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k5, %k4, %k5
+; AVX512F-32-NEXT: kshiftrq $37, %k5, %k4
+; AVX512F-32-NEXT: kxorq %k0, %k4, %k0
+; AVX512F-32-NEXT: kmovd %ecx, %k4
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $13, %ecx
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $26, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k5, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $38, %k0, %k5
+; AVX512F-32-NEXT: kxorq %k7, %k5, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k5
; AVX512F-32-NEXT: movl %eax, %edx
; AVX512F-32-NEXT: andb $2, %dl
; AVX512F-32-NEXT: shrb %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $25, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $39, %k7, %k0
+; AVX512F-32-NEXT: kxorq %k6, %k0, %k6
+; AVX512F-32-NEXT: kmovd %edx, %k0
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) # 8-byte Spill
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %edx
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $24, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k7, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $40, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k1, %k7, %k7
+; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $12, %ecx
+; AVX512F-32-NEXT: andl $15, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $23, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $41, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k2, %k7, %k2
+; AVX512F-32-NEXT: kmovd %ecx, %k0
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $14, %ecx
+; AVX512F-32-NEXT: andl $3, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $22, %k2, %k2
+; AVX512F-32-NEXT: kxorq %k6, %k2, %k2
+; AVX512F-32-NEXT: kshiftrq $42, %k2, %k6
+; AVX512F-32-NEXT: kxorq %k3, %k6, %k3
+; AVX512F-32-NEXT: kmovd %ecx, %k7
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $15, %ecx
+; AVX512F-32-NEXT: andl $1, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $21, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k2, %k3, %k2
+; AVX512F-32-NEXT: kshiftrq $43, %k2, %k3
+; AVX512F-32-NEXT: kxorq %k4, %k3, %k3
+; AVX512F-32-NEXT: kmovd %ecx, %k6
+; AVX512F-32-NEXT: shrb $3, %dl
+; AVX512F-32-NEXT: kshiftlq $63, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $20, %k3, %k3
+; AVX512F-32-NEXT: kxorq %k2, %k3, %k3
+; AVX512F-32-NEXT: kshiftrq $44, %k3, %k2
+; AVX512F-32-NEXT: kxorq %k0, %k2, %k0
+; AVX512F-32-NEXT: kmovd %edx, %k2
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $19, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k3, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $45, %k0, %k3
+; AVX512F-32-NEXT: kxorq %k5, %k3, %k4
+; AVX512F-32-NEXT: kmovd %ecx, %k3
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k4, %k4
+; AVX512F-32-NEXT: kshiftrq $18, %k4, %k4
+; AVX512F-32-NEXT: kxorq %k0, %k4, %k0
+; AVX512F-32-NEXT: kshiftrq $46, %k0, %k4
+; AVX512F-32-NEXT: kxorq %k7, %k4, %k5
+; AVX512F-32-NEXT: kmovd %ecx, %k4
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $6, %cl
+; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
+; AVX512F-32-NEXT: kshiftrq $17, %k5, %k5
+; AVX512F-32-NEXT: kxorq %k0, %k5, %k0
+; AVX512F-32-NEXT: kshiftrq $47, %k0, %k5
+; AVX512F-32-NEXT: kxorq %k6, %k5, %k5
+; AVX512F-32-NEXT: kshiftlq $63, %k5, %k5
+; AVX512F-32-NEXT: kshiftrq $16, %k5, %k5
+; AVX512F-32-NEXT: kxorq %k0, %k5, %k0
+; AVX512F-32-NEXT: kshiftrq $48, %k0, %k5
+; AVX512F-32-NEXT: kmovd %eax, %k6
+; AVX512F-32-NEXT: kxorq %k6, %k5, %k6
+; AVX512F-32-NEXT: kmovd %ecx, %k5
+; AVX512F-32-NEXT: movl %ebx, %edx
+; AVX512F-32-NEXT: shrl $24, %edx
+; AVX512F-32-NEXT: # kill: def %al killed %al killed %eax def %eax
+; AVX512F-32-NEXT: shrb $7, %al
+; AVX512F-32-NEXT: kshiftlq $63, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $15, %k6, %k6
+; AVX512F-32-NEXT: kxorq %k0, %k6, %k6
+; AVX512F-32-NEXT: kshiftrq $49, %k6, %k0
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k7 # 8-byte Reload
+; AVX512F-32-NEXT: kxorq %k7, %k0, %k7
+; AVX512F-32-NEXT: kmovd %eax, %k0
+; AVX512F-32-NEXT: movl %edx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $14, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $50, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k1, %k7, %k7
; AVX512F-32-NEXT: kmovd %edx, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: movl %eax, %edx
+; AVX512F-32-NEXT: # kill: def %dl killed %dl killed %edx def %edx
; AVX512F-32-NEXT: andb $15, %dl
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $13, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $51, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k2, %k7, %k7
+; AVX512F-32-NEXT: kmovd %eax, %k2
; AVX512F-32-NEXT: movl %edx, %eax
; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $12, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $52, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k3, %k7, %k7
+; AVX512F-32-NEXT: kmovd %edx, %k3
; AVX512F-32-NEXT: shrb $3, %al
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm4
-; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm1, %ymm1
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $11, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $53, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k4, %k7, %k7
+; AVX512F-32-NEXT: kmovd %eax, %k4
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $10, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $54, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k5, %k7, %k7
+; AVX512F-32-NEXT: kmovd %eax, %k5
+; AVX512F-32-NEXT: kshiftlq $63, %k7, %k7
+; AVX512F-32-NEXT: kshiftrq $9, %k7, %k7
+; AVX512F-32-NEXT: kxorq %k6, %k7, %k6
+; AVX512F-32-NEXT: kshiftrq $55, %k6, %k7
+; AVX512F-32-NEXT: kxorq %k0, %k7, %k0
+; AVX512F-32-NEXT: kshiftlq $63, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $8, %k0, %k0
+; AVX512F-32-NEXT: kxorq %k6, %k0, %k0
+; AVX512F-32-NEXT: kshiftrq $56, %k0, %k6
+; AVX512F-32-NEXT: kxorq %k1, %k6, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $7, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $57, %k0, %k1
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $6, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $58, %k0, %k1
+; AVX512F-32-NEXT: kxorq %k3, %k1, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3
-; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3
-; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1
-; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2]
-; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
-; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
-; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $30, %eax
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0
-; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
-; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3
-; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
-; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
-; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $5, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $59, %k0, %k1
+; AVX512F-32-NEXT: kxorq %k4, %k1, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $4, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $60, %k0, %k1
+; AVX512F-32-NEXT: kmovd %eax, %k2
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $31, %eax
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: shrl $30, %ecx
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $3, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $61, %k0, %k1
+; AVX512F-32-NEXT: kxorq %k5, %k1, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $2, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
+; AVX512F-32-NEXT: kshiftrq $62, %k0, %k1
+; AVX512F-32-NEXT: kmovd %ecx, %k2
+; AVX512F-32-NEXT: kxorq %k2, %k1, %k1
+; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
+; AVX512F-32-NEXT: kshiftrq $1, %k1, %k1
+; AVX512F-32-NEXT: kxorq %k0, %k1, %k0
; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1
; AVX512F-32-NEXT: korq %k1, %k0, %k1
-; AVX512F-32-NEXT: vpcmpeqb %zmm6, %zmm5, %k0 {%k1}
-; AVX512F-32-NEXT: vpcmpltub %zmm6, %zmm5, %k2 {%k1}
-; AVX512F-32-NEXT: vpcmpleub %zmm6, %zmm5, %k3 {%k1}
-; AVX512F-32-NEXT: vpcmpneqb %zmm6, %zmm5, %k4 {%k1}
-; AVX512F-32-NEXT: vpcmpnltub %zmm6, %zmm5, %k5 {%k1}
-; AVX512F-32-NEXT: vpcmpnleub %zmm6, %zmm5, %k1 {%k1}
+; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, (%esp)
; AVX512F-32-NEXT: movl (%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kmovq %k2, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kmovq %k3, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: kxorq %k0, %k0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: kmovq %k4, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kmovq %k5, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl %esi, %eax
-; AVX512F-32-NEXT: adcl %ecx, %edx
-; AVX512F-32-NEXT: addl $60, %esp
+; AVX512F-32-NEXT: adcl %ebx, %edx
+; AVX512F-32-NEXT: addl $68, %esp
; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: popl %ebx
; AVX512F-32-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=320120&r1=320119&r2=320120&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Thu Dec 7 16:16:09 2017
@@ -4207,39 +4207,35 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4262,39 +4258,35 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4337,39 +4329,35 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4414,39 +4402,35 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4475,39 +4459,35 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4552,39 +4532,35 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4613,39 +4589,35 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4668,39 +4640,35 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4743,39 +4711,35 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4820,39 +4784,35 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4881,39 +4841,35 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4958,39 +4914,35 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -8521,23 +8473,21 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8560,23 +8510,21 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8611,23 +8559,21 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8664,23 +8610,21 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8709,23 +8653,21 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8762,23 +8704,21 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8807,23 +8747,21 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8846,23 +8784,21 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8897,23 +8833,21 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8950,23 +8884,21 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8995,23 +8927,21 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9048,23 +8978,21 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9727,36 +9655,33 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -9784,36 +9709,33 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -9861,36 +9783,33 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -9940,36 +9859,33 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -10003,36 +9919,33 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -10082,36 +9995,33 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -10145,36 +10055,33 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -10202,36 +10109,33 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -10279,36 +10183,33 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -10358,36 +10259,33 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -10421,36 +10319,33 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -10500,36 +10395,33 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -16550,39 +16442,35 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16605,39 +16493,35 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16680,39 +16564,35 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16757,39 +16637,35 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16818,39 +16694,35 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16895,39 +16767,35 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16956,39 +16824,35 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17011,39 +16875,35 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17086,39 +16946,35 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17163,39 +17019,35 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17224,39 +17076,35 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17301,39 +17149,35 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -20864,23 +20708,21 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20903,23 +20745,21 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20954,23 +20794,21 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21007,23 +20845,21 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21052,23 +20888,21 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21105,23 +20939,21 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21150,23 +20982,21 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21189,23 +21019,21 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21240,23 +21068,21 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21293,23 +21119,21 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21338,23 +21162,21 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21391,23 +21213,21 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -22070,36 +21890,33 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22127,36 +21944,33 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22204,36 +22018,33 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22283,36 +22094,33 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22346,36 +22154,33 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22425,36 +22230,33 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22488,36 +22290,33 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22545,36 +22344,33 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22622,36 +22418,33 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22701,36 +22494,33 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22764,36 +22554,33 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -22843,36 +22630,33 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -28991,39 +28775,35 @@ define zeroext i8 @test_vpcmpsged_v4i1_v
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29049,39 +28829,35 @@ define zeroext i8 @test_vpcmpsged_v4i1_v
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29124,39 +28900,35 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29202,39 +28974,35 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29265,39 +29033,35 @@ define zeroext i8 @test_vpcmpsged_v4i1_v
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29342,39 +29106,35 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29405,39 +29165,35 @@ define zeroext i16 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29463,39 +29219,35 @@ define zeroext i16 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29538,39 +29290,35 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29616,39 +29364,35 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29679,39 +29423,35 @@ define zeroext i16 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29756,39 +29496,35 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -33345,23 +33081,21 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33387,23 +33121,21 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33438,23 +33170,21 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33492,23 +33222,21 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33539,23 +33267,21 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33592,23 +33318,21 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33639,23 +33363,21 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33681,23 +33403,21 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33732,23 +33452,21 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33786,23 +33504,21 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33833,23 +33549,21 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33886,23 +33600,21 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34583,36 +34295,33 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -34643,36 +34352,33 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -34722,36 +34428,33 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -34804,36 +34507,33 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -34869,36 +34569,33 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -34950,36 +34647,33 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -35015,36 +34709,33 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -35075,36 +34766,33 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -35154,36 +34842,33 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -35236,36 +34921,33 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -35301,36 +34983,33 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -35382,36 +35061,33 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -41579,39 +41255,35 @@ define zeroext i8 @test_vpcmpultd_v4i1_v
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -41637,39 +41309,35 @@ define zeroext i8 @test_vpcmpultd_v4i1_v
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -41715,39 +41383,35 @@ define zeroext i8 @test_masked_vpcmpultd
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -41795,39 +41459,35 @@ define zeroext i8 @test_masked_vpcmpultd
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -41859,39 +41519,35 @@ define zeroext i8 @test_vpcmpultd_v4i1_v
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -41939,39 +41595,35 @@ define zeroext i8 @test_masked_vpcmpultd
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -42003,39 +41655,35 @@ define zeroext i16 @test_vpcmpultd_v4i1_
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -42061,39 +41709,35 @@ define zeroext i16 @test_vpcmpultd_v4i1_
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -42139,39 +41783,35 @@ define zeroext i16 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -42219,39 +41859,35 @@ define zeroext i16 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -42283,39 +41919,35 @@ define zeroext i16 @test_vpcmpultd_v4i1_
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -42363,39 +41995,35 @@ define zeroext i16 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -45983,23 +45611,21 @@ define zeroext i8 @test_vpcmpultq_v2i1_v
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46025,23 +45651,21 @@ define zeroext i8 @test_vpcmpultq_v2i1_v
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46079,23 +45703,21 @@ define zeroext i8 @test_masked_vpcmpultq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46135,23 +45757,21 @@ define zeroext i8 @test_masked_vpcmpultq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46183,23 +45803,21 @@ define zeroext i8 @test_vpcmpultq_v2i1_v
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46239,23 +45857,21 @@ define zeroext i8 @test_masked_vpcmpultq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46287,23 +45903,21 @@ define zeroext i16 @test_vpcmpultq_v2i1_
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46329,23 +45943,21 @@ define zeroext i16 @test_vpcmpultq_v2i1_
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46383,23 +45995,21 @@ define zeroext i16 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46439,23 +46049,21 @@ define zeroext i16 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46487,23 +46095,21 @@ define zeroext i16 @test_vpcmpultq_v2i1_
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -46543,23 +46149,21 @@ define zeroext i16 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -47261,36 +46865,33 @@ define zeroext i8 @test_vpcmpultq_v4i1_v
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47321,36 +46922,33 @@ define zeroext i8 @test_vpcmpultq_v4i1_v
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47401,36 +46999,33 @@ define zeroext i8 @test_masked_vpcmpultq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47483,36 +47078,33 @@ define zeroext i8 @test_masked_vpcmpultq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47549,36 +47141,33 @@ define zeroext i8 @test_vpcmpultq_v4i1_v
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47631,36 +47220,33 @@ define zeroext i8 @test_masked_vpcmpultq
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47697,36 +47283,33 @@ define zeroext i16 @test_vpcmpultq_v4i1_
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47757,36 +47340,33 @@ define zeroext i16 @test_vpcmpultq_v4i1_
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47837,36 +47417,33 @@ define zeroext i16 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47919,36 +47496,33 @@ define zeroext i16 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -47985,36 +47559,33 @@ define zeroext i16 @test_vpcmpultq_v4i1_
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -48067,36 +47638,33 @@ define zeroext i16 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -49961,39 +49529,35 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
@@ -50016,39 +49580,35 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
@@ -50073,39 +49633,35 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
@@ -50137,36 +49693,33 @@ define zeroext i8 @test_masked_vcmpoeqps
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -50201,36 +49754,33 @@ define zeroext i8 @test_masked_vcmpoeqps
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -50267,36 +49817,33 @@ define zeroext i8 @test_masked_vcmpoeqps
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -50328,39 +49875,35 @@ define zeroext i16 @test_vcmpoeqps_v4i1_
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
@@ -50383,39 +49926,35 @@ define zeroext i16 @test_vcmpoeqps_v4i1_
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
@@ -50440,39 +49979,35 @@ define zeroext i16 @test_vcmpoeqps_v4i1_
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
@@ -50504,36 +50039,33 @@ define zeroext i16 @test_masked_vcmpoeqp
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -50568,36 +50100,33 @@ define zeroext i16 @test_masked_vcmpoeqp
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -50634,36 +50163,33 @@ define zeroext i16 @test_masked_vcmpoeqp
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -54198,23 +53724,21 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
@@ -54237,23 +53761,21 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
@@ -54278,23 +53800,21 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
@@ -54326,20 +53846,19 @@ define zeroext i8 @test_masked_vcmpoeqpd
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -54374,20 +53893,19 @@ define zeroext i8 @test_masked_vcmpoeqpd
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -54424,20 +53942,19 @@ define zeroext i8 @test_masked_vcmpoeqpd
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -54469,23 +53986,21 @@ define zeroext i16 @test_vcmpoeqpd_v2i1_
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
@@ -54508,23 +54023,21 @@ define zeroext i16 @test_vcmpoeqpd_v2i1_
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
@@ -54549,23 +54062,21 @@ define zeroext i16 @test_vcmpoeqpd_v2i1_
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
-; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
@@ -54597,20 +54108,19 @@ define zeroext i16 @test_masked_vcmpoeqp
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -54645,20 +54155,19 @@ define zeroext i16 @test_masked_vcmpoeqp
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -54695,20 +54204,19 @@ define zeroext i16 @test_masked_vcmpoeqp
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %eax, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55344,36 +54852,33 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55401,36 +54906,33 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55460,36 +54962,33 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55526,36 +55025,33 @@ define zeroext i8 @test_masked_vcmpoeqpd
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55592,36 +55088,33 @@ define zeroext i8 @test_masked_vcmpoeqpd
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55660,36 +55153,33 @@ define zeroext i8 @test_masked_vcmpoeqpd
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
-; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %al killed %al killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55723,36 +55213,33 @@ define zeroext i16 @test_vcmpoeqpd_v4i1_
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55780,36 +55267,33 @@ define zeroext i16 @test_vcmpoeqpd_v4i1_
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55839,36 +55323,33 @@ define zeroext i16 @test_vcmpoeqpd_v4i1_
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55905,36 +55386,33 @@ define zeroext i16 @test_masked_vcmpoeqp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -55971,36 +55449,33 @@ define zeroext i16 @test_masked_vcmpoeqp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
@@ -56039,36 +55514,33 @@ define zeroext i16 @test_masked_vcmpoeqp
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
-; NoVLX-NEXT: kmovw %eax, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kshiftrw $1, %k1, %k1
-; NoVLX-NEXT: kshiftlw $1, %k1, %k1
-; NoVLX-NEXT: korw %k0, %k1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
-; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
-; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k2
+; NoVLX-NEXT: kshiftrw $1, %k2, %k2
+; NoVLX-NEXT: kshiftlw $1, %k2, %k2
+; NoVLX-NEXT: korw %k1, %k2, %k1
+; NoVLX-NEXT: kshiftrw $1, %k1, %k2
+; NoVLX-NEXT: kxorw %k0, %k2, %k0
+; NoVLX-NEXT: kshiftlw $15, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kxorw %k1, %k0, %k0
+; NoVLX-NEXT: kshiftrw $2, %k0, %k1
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $13, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
+; NoVLX-NEXT: kshiftrw $3, %k0, %k1
+; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
+; NoVLX-NEXT: kmovw %eax, %k2
+; NoVLX-NEXT: kxorw %k2, %k1, %k1
+; NoVLX-NEXT: kshiftlw $15, %k1, %k1
+; NoVLX-NEXT: kshiftrw $12, %k1, %k1
+; NoVLX-NEXT: kxorw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax
; NoVLX-NEXT: vzeroupper
More information about the llvm-commits
mailing list