[llvm] r320674 - [SelectionDAG][X86] Improve legalization of v32i1 CONCAT_VECTORS of v16i1 for AVX512F.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 00:25:58 PST 2017
Author: ctopper
Date: Thu Dec 14 00:25:58 2017
New Revision: 320674
URL: http://llvm.org/viewvc/llvm-project?rev=320674&view=rev
Log:
[SelectionDAG][X86] Improve legalization of v32i1 CONCAT_VECTORS of v16i1 for AVX512F.
A v32i1 CONCAT_VECTORS of v16i1 uses promotion to v32i8 to legalize the v32i1. This results in a bunch of extract_vector_elts and a build_vector that ultimately gets scalarized.
This patch checks to see if v16i8 is legal and inserts a any_extend to that so that we can concat v16i8 to v32i8 and avoid creating the extracts.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll
llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll
llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp?rev=320674&r1=320673&r2=320674&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Thu Dec 14 00:25:58 2017
@@ -3497,6 +3497,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
+ // If the input type is legal and we can promote it to a legal type with the
+ // same element size, go ahead do that to create a new concat.
+ if (getTypeAction(N->getOperand(0).getValueType()) ==
+ TargetLowering::TypeLegal) {
+ EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem);
+ if (TLI.isTypeLegal(InPromotedTy)) {
+ SmallVector<SDValue, 8> Ops(NumOperands);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy,
+ N->getOperand(i));
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops);
+ }
+ }
+
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
for (unsigned i = 0; i < NumOperands; ++i) {
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=320674&r1=320673&r2=320674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Thu Dec 14 00:25:58 2017
@@ -1802,138 +1802,137 @@ define void @ktest_2(<32 x float> %in, f
; KNL-NEXT: subq $32, %rsp
; KNL-NEXT: vmovups (%rdi), %zmm2
; KNL-NEXT: vmovups 64(%rdi), %zmm3
-; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1
+; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k1
; KNL-NEXT: kshiftlw $14, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm3
-; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; KNL-NEXT: vmovd %ecx, %xmm2
+; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $12, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $11, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $10, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $9, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $8, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $7, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $6, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $5, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $4, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $3, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $2, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftlw $1, %k1, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; KNL-NEXT: kshiftrw $15, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2
+; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k2
; KNL-NEXT: kshiftlw $14, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: kshiftlw $15, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm2
-; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; KNL-NEXT: vmovd %ecx, %xmm3
+; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $13, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $12, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $11, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $10, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $9, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $8, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $7, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $6, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $5, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $4, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $3, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $2, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftlw $1, %k2, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; KNL-NEXT: kshiftrw $15, %k2, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
-; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z}
-; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z}
-; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0
+; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; KNL-NEXT: vmovups 68(%rdi), %zmm5 {%k2} {z}
+; KNL-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z}
+; KNL-NEXT: vcmpltps %zmm4, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
@@ -1997,77 +1996,76 @@ define void @ktest_2(<32 x float> %in, f
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
-; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0
+; KNL-NEXT: vcmpltps %zmm5, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm3
-; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; KNL-NEXT: vmovd %ecx, %xmm5
+; KNL-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; KNL-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
-; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2
-; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3
+; KNL-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
+; KNL-NEXT: vpor %xmm5, %xmm3, %xmm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
; KNL-NEXT: vpslld $31, %zmm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; KNL-NEXT: vpor %xmm4, %xmm2, %xmm2
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
@@ -2150,138 +2148,137 @@ define void @ktest_2(<32 x float> %in, f
; AVX512DQ-NEXT: subq $32, %rsp
; AVX512DQ-NEXT: vmovups (%rdi), %zmm2
; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3
-; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1
+; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1
; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm3
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vmovd %ecx, %xmm2
+; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $13, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $12, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $11, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $10, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $9, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $8, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $7, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $6, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $5, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $4, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $3, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $2, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2
+; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k2
; AVX512DQ-NEXT: kshiftlw $14, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm2
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vmovd %ecx, %xmm3
+; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $13, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $12, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $11, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $10, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $9, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $8, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $6, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $5, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $4, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $3, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $2, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftlw $1, %k2, %k0
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: kshiftrw $15, %k2, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z}
-; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z}
-; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm1, %k0
+; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm5 {%k2} {z}
+; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z}
+; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm0, %k0
; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
@@ -2345,77 +2342,76 @@ define void @ktest_2(<32 x float> %in, f
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm0, %k0
+; AVX512DQ-NEXT: vcmpltps %zmm5, %zmm1, %k0
; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm3
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vmovd %ecx, %xmm5
+; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
+; AVX512DQ-NEXT: vpor %xmm5, %xmm3, %xmm3
; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
; AVX512DQ-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; AVX512DQ-NEXT: vpor %xmm4, %xmm2, %xmm2
; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
@@ -2909,310 +2905,22 @@ define void @store_64i1(<64 x i1>* %a, <
;
; KNL-LABEL: store_64i1:
; KNL: ## %bb.0:
-; KNL-NEXT: pushq %rbp
-; KNL-NEXT: .cfi_def_cfa_offset 16
-; KNL-NEXT: pushq %r15
-; KNL-NEXT: .cfi_def_cfa_offset 24
-; KNL-NEXT: pushq %r14
-; KNL-NEXT: .cfi_def_cfa_offset 32
-; KNL-NEXT: pushq %r13
-; KNL-NEXT: .cfi_def_cfa_offset 40
-; KNL-NEXT: pushq %r12
-; KNL-NEXT: .cfi_def_cfa_offset 48
-; KNL-NEXT: pushq %rbx
-; KNL-NEXT: .cfi_def_cfa_offset 56
-; KNL-NEXT: .cfi_offset %rbx, -56
-; KNL-NEXT: .cfi_offset %r12, -48
-; KNL-NEXT: .cfi_offset %r13, -40
-; KNL-NEXT: .cfi_offset %r14, -32
-; KNL-NEXT: .cfi_offset %r15, -24
-; KNL-NEXT: .cfi_offset %rbp, -16
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
-; KNL-NEXT: vpslld $31, %zmm1, %zmm1
-; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
-; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
; KNL-NEXT: vpslld $31, %zmm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r8d
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r10d
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r11d
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r14d
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r15d
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r12d
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r13d
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebx
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebp
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %esi
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %r9d, %xmm3
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
-; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
-; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT: kmovw %k0, 6(%rdi)
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
-; KNL-NEXT: kmovw %k0, 6(%rdi)
-; KNL-NEXT: kshiftlw $14, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r8d
-; KNL-NEXT: kshiftlw $15, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: kshiftlw $13, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r9d
-; KNL-NEXT: kshiftlw $12, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r11d
-; KNL-NEXT: kshiftlw $11, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r14d
-; KNL-NEXT: kshiftlw $10, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r15d
-; KNL-NEXT: kshiftlw $9, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: kshiftlw $8, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r13d
-; KNL-NEXT: kshiftlw $7, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: kshiftlw $6, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: kshiftlw $5, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ebp
-; KNL-NEXT: kshiftlw $4, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ebx
-; KNL-NEXT: kshiftlw $3, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $2, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: kshiftlw $1, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %r10d, %xmm2
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
-; KNL-NEXT: kshiftrw $15, %k2, %k0
-; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1
-; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
-; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k0, 4(%rdi)
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, 4(%rdi)
-; KNL-NEXT: kshiftlw $14, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r8d
-; KNL-NEXT: kshiftlw $15, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: kshiftlw $13, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r9d
-; KNL-NEXT: kshiftlw $12, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r11d
-; KNL-NEXT: kshiftlw $11, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r14d
-; KNL-NEXT: kshiftlw $10, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r15d
-; KNL-NEXT: kshiftlw $9, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r12d
-; KNL-NEXT: kshiftlw $8, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %r13d
-; KNL-NEXT: kshiftlw $7, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: kshiftlw $6, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %esi
-; KNL-NEXT: kshiftlw $5, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ebp
-; KNL-NEXT: kshiftlw $4, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ebx
-; KNL-NEXT: kshiftlw $3, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $2, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: kshiftlw $1, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vmovd %r10d, %xmm1
-; KNL-NEXT: kmovw %k0, %r10d
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0
-; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
-; KNL-NEXT: kmovw %k1, 2(%rdi)
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r8d
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r10d
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r11d
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r14d
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r15d
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r12d
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %r13d
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %esi
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebp
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ebx
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: vmovd %r9d, %xmm0
-; KNL-NEXT: kmovw %k1, %r9d
-; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k0, 2(%rdi)
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
-; KNL-NEXT: popq %rbx
-; KNL-NEXT: popq %r12
-; KNL-NEXT: popq %r13
-; KNL-NEXT: popq %r14
-; KNL-NEXT: popq %r15
-; KNL-NEXT: popq %rbp
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
@@ -3234,310 +2942,22 @@ define void @store_64i1(<64 x i1>* %a, <
;
; AVX512DQ-LABEL: store_64i1:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: pushq %rbp
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 16
-; AVX512DQ-NEXT: pushq %r15
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 24
-; AVX512DQ-NEXT: pushq %r14
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 32
-; AVX512DQ-NEXT: pushq %r13
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 40
-; AVX512DQ-NEXT: pushq %r12
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 48
-; AVX512DQ-NEXT: pushq %rbx
-; AVX512DQ-NEXT: .cfi_def_cfa_offset 56
-; AVX512DQ-NEXT: .cfi_offset %rbx, -56
-; AVX512DQ-NEXT: .cfi_offset %r12, -48
-; AVX512DQ-NEXT: .cfi_offset %r13, -40
-; AVX512DQ-NEXT: .cfi_offset %r14, -32
-; AVX512DQ-NEXT: .cfi_offset %r15, -24
-; AVX512DQ-NEXT: .cfi_offset %rbp, -16
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
-; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r8d
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r9d
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r10d
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r11d
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r14d
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r15d
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r12d
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r13d
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ebx
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ebp
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %edx
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %esi
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: vmovd %r9d, %xmm3
-; AVX512DQ-NEXT: kmovw %k1, %r9d
-; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k2
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
-; AVX512DQ-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: kmovw %k0, 6(%rdi)
; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512DQ-NEXT: kmovw %k0, 6(%rdi)
-; AVX512DQ-NEXT: kshiftlw $14, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r8d
-; AVX512DQ-NEXT: kshiftlw $15, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r10d
-; AVX512DQ-NEXT: kshiftlw $13, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r9d
-; AVX512DQ-NEXT: kshiftlw $12, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r11d
-; AVX512DQ-NEXT: kshiftlw $11, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r14d
-; AVX512DQ-NEXT: kshiftlw $10, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r15d
-; AVX512DQ-NEXT: kshiftlw $9, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r12d
-; AVX512DQ-NEXT: kshiftlw $8, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r13d
-; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: kshiftlw $6, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %esi
-; AVX512DQ-NEXT: kshiftlw $5, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ebp
-; AVX512DQ-NEXT: kshiftlw $4, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ebx
-; AVX512DQ-NEXT: kshiftlw $3, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: kshiftlw $2, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %edx
-; AVX512DQ-NEXT: kshiftlw $1, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: vmovd %r10d, %xmm2
-; AVX512DQ-NEXT: kmovw %k0, %r10d
-; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k2, %k0
-; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1
-; AVX512DQ-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX512DQ-NEXT: kmovw %k0, 4(%rdi)
; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512DQ-NEXT: kmovw %k0, 4(%rdi)
-; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r8d
-; AVX512DQ-NEXT: kshiftlw $15, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r10d
-; AVX512DQ-NEXT: kshiftlw $13, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r9d
-; AVX512DQ-NEXT: kshiftlw $12, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r11d
-; AVX512DQ-NEXT: kshiftlw $11, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r14d
-; AVX512DQ-NEXT: kshiftlw $10, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r15d
-; AVX512DQ-NEXT: kshiftlw $9, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r12d
-; AVX512DQ-NEXT: kshiftlw $8, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %r13d
-; AVX512DQ-NEXT: kshiftlw $7, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: kshiftlw $6, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %esi
-; AVX512DQ-NEXT: kshiftlw $5, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ebp
-; AVX512DQ-NEXT: kshiftlw $4, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ebx
-; AVX512DQ-NEXT: kshiftlw $3, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: kshiftlw $2, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %edx
-; AVX512DQ-NEXT: kshiftlw $1, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: vmovd %r10d, %xmm1
-; AVX512DQ-NEXT: kmovw %k0, %r10d
-; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0
-; AVX512DQ-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1
-; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r8d
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r9d
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r10d
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r11d
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r14d
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r15d
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r12d
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %r13d
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %edx
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %esi
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ebp
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ebx
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: vmovd %r9d, %xmm0
-; AVX512DQ-NEXT: kmovw %k1, %r9d
-; AVX512DQ-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT: kmovw %k0, 2(%rdi)
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
-; AVX512DQ-NEXT: popq %rbx
-; AVX512DQ-NEXT: popq %r12
-; AVX512DQ-NEXT: popq %r13
-; AVX512DQ-NEXT: popq %r14
-; AVX512DQ-NEXT: popq %r15
-; AVX512DQ-NEXT: popq %rbp
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
store <64 x i1> %v, <64 x i1>* %a
Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=320674&r1=320673&r2=320674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Thu Dec 14 00:25:58 2017
@@ -355,284 +355,28 @@ define i64 @test12_v64i16(<64 x i16> %a,
; KNL-NEXT: subq $64, %rsp
; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
-; KNL-NEXT: vpslld $31, %zmm1, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm1
-; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rsp)
; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=320674&r1=320673&r2=320674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Thu Dec 14 00:25:58 2017
@@ -16,98 +16,17 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -134,98 +53,17 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -499,103 +337,22 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -622,103 +379,22 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -1963,98 +1639,19 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -2082,6 +1679,48 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i64> %__a to <16 x i16>
+ %load = load <4 x i64>, <4 x i64>* %__b
+ %1 = bitcast <4 x i64> %load to <16 x i16>
+ %2 = icmp eq <16 x i16> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
@@ -2094,10 +1733,11 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
@@ -2179,24 +1819,25 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
- %load = load <4 x i64>, <4 x i64>* %__b
- %1 = bitcast <4 x i64> %load to <16 x i16>
+ %1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <32 x i1> %3 to i32
- ret i32 %4
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
}
-define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
+define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
+; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
+; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
@@ -2215,130 +1856,7 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <4 x i64> %__a to <16 x i16>
- %1 = bitcast <4 x i64> %__b to <16 x i16>
- %2 = icmp eq <16 x i16> %0, %1
- %3 = bitcast i16 %__u to <16 x i1>
- %4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <32 x i1> %5 to i32
- ret i32 %6
-}
-
-define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
-; VLX-NEXT: kmovd %k0, %eax
-; VLX-NEXT: vzeroupper
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
@@ -2450,89 +1968,15 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2541,12 +1985,7 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -2574,89 +2013,15 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2665,12 +2030,7 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -3132,141 +2492,13 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -3395,143 +2627,15 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -3731,15 +2835,15 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -3752,145 +2856,17 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm4, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -3999,174 +2975,46 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -12251,6 +11099,82 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
@@ -12266,7 +11190,8 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
@@ -12350,19 +11275,22 @@ entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <32 x i1> %3 to i32
- ret i32 %4
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
}
-define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
+define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
@@ -12381,10 +11309,11 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
@@ -12469,20 +11398,108 @@ entry:
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <32 x i1> %3 to i32
- ret i32 %4
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
}
-define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
+
+define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sgt <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
-; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
@@ -12495,7 +11512,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .cfi_offset %r13, -40
@@ -12508,6 +11525,8 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -12575,7 +11594,10 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
@@ -12591,20 +11613,20 @@ entry:
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <32 x i1> %5 to i32
- ret i32 %6
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
}
-define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
+define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
-; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
+; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
@@ -12617,7 +11639,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .cfi_offset %r13, -40
@@ -12630,6 +11652,8 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -12697,7 +11721,10 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
@@ -12714,538 +11741,35 @@ entry:
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <32 x i1> %5 to i32
- ret i32 %6
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
}
-define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
+define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
+; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
+; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %1 = bitcast <2 x i64> %__b to <16 x i8>
- %2 = icmp sgt <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <64 x i1> %3 to i64
- ret i64 %4
-}
-
-define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %load = load <2 x i64>, <2 x i64>* %__b
- %1 = bitcast <2 x i64> %load to <16 x i8>
- %2 = icmp sgt <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <64 x i1> %3 to i64
- ret i64 %4
-}
-
-define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %1 = bitcast <2 x i64> %__b to <16 x i8>
- %2 = icmp sgt <16 x i8> %0, %1
- %3 = bitcast i16 %__u to <16 x i1>
- %4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <64 x i1> %5 to i64
- ret i64 %6
-}
-
-define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %load = load <2 x i64>, <2 x i64>* %__b
- %1 = bitcast <2 x i64> %load to <16 x i8>
- %2 = icmp sgt <16 x i8> %0, %1
- %3 = bitcast i16 %__u to <16 x i1>
- %4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <64 x i1> %5 to i64
- ret i64 %6
-}
-
-
-define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: vzeroupper
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14198,98 +12722,19 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -14317,98 +12762,19 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -14685,89 +13051,15 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14776,12 +13068,7 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -14809,89 +13096,15 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14900,12 +13113,7 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -15367,141 +13575,13 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -15630,143 +13710,15 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -15966,15 +13918,15 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -15987,145 +13939,17 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -16234,174 +14058,46 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24486,6 +22182,87 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
@@ -24503,7 +22280,8 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
@@ -24587,19 +22365,22 @@ entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <32 x i1> %3 to i32
- ret i32 %4
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
}
-define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
+define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
@@ -24618,13 +22399,14 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
@@ -24709,20 +22491,113 @@ entry:
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <32 x i1> %3 to i32
- ret i32 %4
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
}
-define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
+
+define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp sge <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
-; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
@@ -24735,7 +22610,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .cfi_offset %r13, -40
@@ -24750,6 +22625,8 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -24817,7 +22694,10 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
@@ -24833,20 +22713,20 @@ entry:
%2 = icmp sge <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <32 x i1> %5 to i32
- ret i32 %6
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <64 x i1> %5 to i64
+ ret i64 %6
}
-define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
+define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
-; VLX-NEXT: kmovd %k0, %eax
+; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
@@ -24859,7 +22739,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .cfi_offset %r13, -40
@@ -24875,6 +22755,8 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -24942,520 +22824,10 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %load = load <2 x i64>, <2 x i64>* %__b
- %1 = bitcast <2 x i64> %load to <16 x i8>
- %2 = icmp sge <16 x i8> %0, %1
- %3 = bitcast i16 %__u to <16 x i1>
- %4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <32 x i1> %5 to i32
- ret i32 %6
-}
-
-
-define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %1 = bitcast <2 x i64> %__b to <16 x i8>
- %2 = icmp sge <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <64 x i1> %3 to i64
- ret i64 %4
-}
-
-define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %load = load <2 x i64>, <2 x i64>* %__b
- %1 = bitcast <2 x i64> %load to <16 x i8>
- %2 = icmp sge <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <64 x i1> %3 to i64
- ret i64 %4
-}
-
-define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %1 = bitcast <2 x i64> %__b to <16 x i8>
- %2 = icmp sge <16 x i8> %0, %1
- %3 = bitcast i16 %__u to <16 x i1>
- %4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <64 x i1> %5 to i64
- ret i64 %6
-}
-
-define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
@@ -26493,100 +23865,21 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -26614,101 +23907,22 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -26990,91 +24204,17 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27083,12 +24223,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -27116,92 +24251,18 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27210,12 +24271,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -27680,146 +24736,18 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2
-; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27949,146 +24877,18 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -28289,15 +25089,15 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -28310,148 +25110,20 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
-; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -28508,231 +25180,103 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
-; NoVLX-NEXT: vmovdqa (%rsi), %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm5
-; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
-; NoVLX-NEXT: vpxor %ymm4, %ymm5, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0
+; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -36945,18 +33489,10 @@ define zeroext i32 @test_vpcmpultb_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
@@ -36964,82 +33500,9 @@ define zeroext i32 @test_vpcmpultb_v16i1
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -37066,350 +33529,20 @@ define zeroext i32 @test_vpcmpultb_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %load = load <2 x i64>, <2 x i64>* %__b
- %1 = bitcast <2 x i64> %load to <16 x i8>
- %2 = icmp ult <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <32 x i1> %3 to i32
- ret i32 %4
-}
-
-define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
-; VLX-NEXT: kmovd %k0, %eax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
-entry:
- %0 = bitcast <2 x i64> %__a to <16 x i8>
- %1 = bitcast <2 x i64> %__b to <16 x i8>
- %2 = icmp ult <16 x i8> %0, %1
- %3 = bitcast i16 %__u to <16 x i1>
- %4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <32 x i1> %5 to i32
- ret i32 %6
-}
-
-define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
-; VLX-NEXT: kmovd %k0, %eax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -37418,22 +33551,20 @@ entry:
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
- %3 = bitcast i16 %__u to <16 x i1>
- %4 = and <16 x i1> %2, %3
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <32 x i1> %5 to i32
- ret i32 %6
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
}
-
-define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
+define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
-; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
+; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
@@ -37446,7 +33577,7 @@ define zeroext i64 @test_vpcmpultb_v16i1
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .cfi_offset %r13, -40
@@ -37458,11 +33589,10 @@ define zeroext i64 @test_vpcmpultb_v16i1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -37530,10 +33660,7 @@ define zeroext i64 @test_vpcmpultb_v16i1
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
@@ -37547,19 +33674,22 @@ entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
- %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %4 = bitcast <64 x i1> %3 to i64
- ret i64 %4
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
}
-define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
+define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
-; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: kmovd %edi, %k1
+; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
+; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
-; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
+; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .cfi_def_cfa_offset 16
@@ -37572,7 +33702,7 @@ define zeroext i64 @test_vpcmpultb_v16i1
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .cfi_offset %r13, -40
@@ -37580,15 +33710,14 @@ define zeroext i64 @test_vpcmpultb_v16i1
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -37656,10 +33785,7 @@ define zeroext i64 @test_vpcmpultb_v16i1
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
@@ -37674,6 +33800,100 @@ entry:
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %2, %3
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %1 = bitcast <2 x i64> %__b to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
+ %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
+; NoVLX-NEXT: shlq $32, %rcx
+; NoVLX-NEXT: movl (%rsp), %eax
+; NoVLX-NEXT: orq %rcx, %rax
+; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
+entry:
+ %0 = bitcast <2 x i64> %__a to <16 x i8>
+ %load = load <2 x i64>, <2 x i64>* %__b
+ %1 = bitcast <2 x i64> %load to <16 x i8>
+ %2 = icmp ult <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
@@ -38964,101 +35184,22 @@ define zeroext i32 @test_vpcmpultw_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39086,101 +35227,22 @@ define zeroext i32 @test_vpcmpultw_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39463,92 +35525,18 @@ define zeroext i64 @test_vpcmpultw_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -39557,12 +35545,7 @@ define zeroext i64 @test_vpcmpultw_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39589,93 +35572,19 @@ define zeroext i64 @test_vpcmpultw_v16i1
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -39684,12 +35593,7 @@ define zeroext i64 @test_vpcmpultw_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -40153,150 +36057,22 @@ define zeroext i64 @test_vpcmpultw_v32i1
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor %ymm2, %ymm4, %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpmovsxbd %xmm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40422,150 +36198,22 @@ define zeroext i64 @test_vpcmpultw_v32i1
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm2
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpxor 32(%rdi), %ymm1, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40765,15 +36413,15 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm8
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm4
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm6, %xmm6
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -40787,150 +36435,22 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm5, %ymm6, %ymm3
+; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3
-; NoVLX-NEXT: vpxor %ymm5, %ymm8, %ymm2
-; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpand %xmm1, %xmm2, %xmm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm6, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40999,219 +36519,91 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm6
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
+; NoVLX-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm5, %xmm5
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm3
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm5
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm5, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor 32(%rsi), %ymm4, %ymm4
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpand %xmm5, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
Modified: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll?rev=320674&r1=320673&r2=320674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll Thu Dec 14 00:25:58 2017
@@ -287,278 +287,20 @@ define i32 @v32i16(<32 x i16> %a, <32 x
; AVX512F-NEXT: .cfi_def_cfa_register %rbp
; AVX512F-NEXT: andq $-32, %rsp
; AVX512F-NEXT: subq $32, %rsp
-; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm1
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm2
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm2
+; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
+; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
Modified: llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll?rev=320674&r1=320673&r2=320674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll Thu Dec 14 00:25:58 2017
@@ -60,142 +60,14 @@ define i32 @v32i16(<32 x i16> %a, <32 x
; AVX512F-NEXT: subq $32, %rsp
; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=320674&r1=320673&r2=320674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Thu Dec 14 00:25:58 2017
@@ -4259,280 +4259,24 @@ define <64 x i1> @test_cmp_v64i16(<64 x
;
; AVX512F-LABEL: test_cmp_v64i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3
-; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3
-; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm3
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm2
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
+; AVX512F-NEXT: vpmovsxwd %ymm3, %zmm3
+; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
-; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
-; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
+; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0
@@ -4546,280 +4290,24 @@ define <64 x i1> @test_cmp_v64i16(<64 x
;
; AVX512DQ-LABEL: test_cmp_v64i16:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3
-; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
-; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm3
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; AVX512DQ-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
; AVX512DQ-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
-; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm2
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512DQ-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
+; AVX512DQ-NEXT: vpmovsxwd %ymm3, %zmm3
+; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3
; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpsllw $7, %ymm2, %ymm2
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX512DQ-NEXT: vpxor %xmm6, %xmm6, %xmm6
; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm6, %ymm2
-; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm1
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; AVX512DQ-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm0
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0
More information about the llvm-commits
mailing list