[llvm] r320675 - [X86] Make ANY_EXTEND from vXi1 Custom for more types.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 00:26:01 PST 2017
Author: ctopper
Date: Thu Dec 14 00:26:00 2017
New Revision: 320675
URL: http://llvm.org/viewvc/llvm-project?rev=320675&view=rev
Log:
[X86] Make ANY_EXTEND from vXi1 Custom for more types.
We should be able to support ANY_EXTEND for any types we support ZERO_EXTEND for.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=320675&r1=320674&r2=320675&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 14 00:26:00 2017
@@ -1241,6 +1241,8 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Custom);
}
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
@@ -1254,8 +1256,10 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i8, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v16i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i16, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v8i16, Custom);
for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
setOperationAction(ISD::FFLOOR, VT, Legal);
@@ -1446,6 +1450,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
@@ -1453,6 +1458,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=320675&r1=320674&r2=320675&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Thu Dec 14 00:26:00 2017
@@ -800,134 +800,13 @@ define i32 @test_insertelement_v32i1(i32
; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
-; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
-; KNL-NEXT: vmovd %edx, %xmm1
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1
-; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %edx
-; KNL-NEXT: vmovd %edx, %xmm0
-; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
+; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k1
+; KNL-NEXT: movl {{.*}}(%rip), %ecx
+; KNL-NEXT: vpbroadcastd %ecx, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k1
+; KNL-NEXT: vpbroadcastd %ecx, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=320675&r1=320674&r2=320675&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Thu Dec 14 00:26:00 2017
@@ -1803,269 +1803,26 @@ define void @ktest_2(<32 x float> %in, f
; KNL-NEXT: vmovups (%rdi), %zmm2
; KNL-NEXT: vmovups 64(%rdi), %zmm3
; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k1
-; KNL-NEXT: kshiftlw $14, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $15, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm2
-; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $13, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $12, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $11, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $10, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $9, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $8, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $7, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $6, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $5, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $4, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $3, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $2, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftlw $1, %k1, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; KNL-NEXT: kshiftrw $15, %k1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT: movl {{.*}}(%rip), %eax
+; KNL-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm2, %xmm2
; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k2
-; KNL-NEXT: kshiftlw $14, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: kshiftlw $15, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm3
-; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $13, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $12, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $11, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $10, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $9, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $8, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $7, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $6, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $5, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $4, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $3, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $2, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftlw $1, %k2, %k0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; KNL-NEXT: kshiftrw $15, %k2, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; KNL-NEXT: vmovups 68(%rdi), %zmm5 {%k2} {z}
-; KNL-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z}
-; KNL-NEXT: vcmpltps %zmm4, %zmm0, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm4
-; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
-; KNL-NEXT: vcmpltps %zmm5, %zmm1, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm5
-; KNL-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
-; KNL-NEXT: vpor %xmm5, %xmm3, %xmm3
+; KNL-NEXT: vpbroadcastd %eax, %zmm3 {%k2} {z}
+; KNL-NEXT: vpmovdb %zmm3, %xmm3
+; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z}
+; KNL-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z}
+; KNL-NEXT: vcmpltps %zmm5, %zmm0, %k1
+; KNL-NEXT: vpbroadcastd %eax, %zmm5 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm5, %xmm5
+; KNL-NEXT: vpor %xmm5, %xmm2, %xmm2
+; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k1
+; KNL-NEXT: vpbroadcastd %eax, %zmm4 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm4, %xmm4
+; KNL-NEXT: vpor %xmm4, %xmm3, %xmm3
; KNL-NEXT: vpmovsxbd %xmm3, %zmm3
; KNL-NEXT: vpslld $31, %zmm3, %zmm3
; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vpor %xmm4, %xmm2, %xmm2
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
; KNL-NEXT: vpslld $31, %zmm2, %zmm2
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0
@@ -2149,269 +1906,26 @@ define void @ktest_2(<32 x float> %in, f
; AVX512DQ-NEXT: vmovups (%rdi), %zmm2
; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3
; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1
-; AVX512DQ-NEXT: kshiftlw $14, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm2
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $13, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $12, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $11, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $10, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $9, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $8, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $7, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $6, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $5, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $4, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $3, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $2, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlw $1, %k1, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; AVX512DQ-NEXT: movl {{.*}}(%rip), %eax
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm2 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2
; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k2
-; AVX512DQ-NEXT: kshiftlw $14, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm3
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $13, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $12, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $11, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $10, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $9, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $8, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $7, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $6, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $5, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $4, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $3, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $2, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftlw $1, %k2, %k0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: kshiftrw $15, %k2, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
-; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm5 {%k2} {z}
-; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z}
-; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm0, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm4
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
-; AVX512DQ-NEXT: vcmpltps %zmm5, %zmm1, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm5
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm5, %xmm5
-; AVX512DQ-NEXT: vpor %xmm5, %xmm3, %xmm3
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm3 {%k2} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3
+; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z}
+; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z}
+; AVX512DQ-NEXT: vcmpltps %zmm5, %zmm0, %k1
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm5 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm5, %xmm5
+; AVX512DQ-NEXT: vpor %xmm5, %xmm2, %xmm2
+; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm1, %k1
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm4 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm4, %xmm4
+; AVX512DQ-NEXT: vpor %xmm4, %xmm3, %xmm3
; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3
; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
; AVX512DQ-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; AVX512DQ-NEXT: vpor %xmm4, %xmm2, %xmm2
; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2
; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=320675&r1=320674&r2=320675&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Thu Dec 14 00:26:00 2017
@@ -191,138 +191,17 @@ define i32 @test12_v32i32(<32 x i32> %a,
; KNL-NEXT: movq %rsp, %rbp
; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $32, %rsp
-; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm1
-; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k1
+; KNL-NEXT: movl {{.*}}(%rip), %eax
+; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
-; KNL-NEXT: kshiftlw $14, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftlw $15, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $13, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $12, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $11, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $10, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $9, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $8, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $7, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $6, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $5, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $4, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $3, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftlw $1, %k0, %k1
-; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
+; KNL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=320675&r1=320674&r2=320675&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Thu Dec 14 00:26:00 2017
@@ -92,99 +92,23 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -214,99 +138,23 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -423,90 +271,19 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -515,12 +292,7 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -550,90 +322,19 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -642,12 +343,7 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -1721,99 +1417,23 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -1844,99 +1464,23 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -2060,90 +1604,19 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2152,12 +1625,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -2188,90 +1656,19 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2280,12 +1677,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -5642,95 +5034,19 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -5758,95 +5074,19 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -5876,96 +5116,20 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -5996,96 +5160,20 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -6117,95 +5205,19 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -6236,96 +5248,20 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -6358,86 +5294,15 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6446,12 +5311,7 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -6479,86 +5339,15 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6567,12 +5356,7 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -6602,87 +5386,16 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6691,12 +5404,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -6727,87 +5435,16 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6816,12 +5453,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -6853,86 +5485,15 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6941,12 +5502,7 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -6977,87 +5533,16 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -7066,12 +5551,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -11175,99 +9655,23 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -11297,99 +9701,23 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -11506,90 +9834,19 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -11598,12 +9855,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -11633,90 +9885,19 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -11725,12 +9906,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -12804,99 +10980,23 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -12927,99 +11027,23 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -13143,90 +11167,19 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -13235,12 +11188,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -13271,90 +11219,19 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -13363,12 +11240,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -16725,95 +14597,19 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -16841,95 +14637,19 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -16959,96 +14679,20 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -17079,96 +14723,20 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -17200,95 +14768,19 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -17319,96 +14811,20 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -17441,86 +14857,15 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -17529,12 +14874,7 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -17562,86 +14902,15 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -17650,12 +14919,7 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -17685,87 +14949,16 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -17774,12 +14967,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -17810,87 +14998,16 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -17899,12 +15016,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -17936,86 +15048,15 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -18024,12 +15065,7 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -18060,87 +15096,16 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -18149,12 +15114,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -22263,101 +19223,25 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -22387,18 +19271,8 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -22406,83 +19280,17 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -22593,103 +19401,32 @@ define zeroext i64 @test_masked_vpcmpsge
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: kmovd %edi, %k1
-; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -22698,12 +19435,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -22733,18 +19465,8 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -22752,74 +19474,13 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -22828,12 +19489,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -23952,101 +20608,25 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -24077,18 +20657,8 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
@@ -24096,83 +20666,17 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -24301,92 +20805,21 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24395,12 +20828,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -24431,18 +20859,8 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
@@ -24450,74 +20868,13 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24526,12 +20883,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -27936,95 +24288,19 @@ define zeroext i32 @test_vpcmpsged_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -28052,95 +24328,19 @@ define zeroext i32 @test_vpcmpsged_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -28170,96 +24370,20 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -28290,96 +24414,20 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -28411,95 +24459,19 @@ define zeroext i32 @test_vpcmpsged_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -28530,208 +24502,61 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
- %0 = bitcast <8 x i64> %__a to <16 x i32>
- %load = load i32, i32* %__b
- %vec = insertelement <16 x i32> undef, i32 %load, i32 0
- %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %2 = icmp sge <16 x i32> %0, %1
- %3 = bitcast i16 %__u to <16 x i1>
- %4 = and <16 x i1> %3, %2
- %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
- %6 = bitcast <32 x i1> %5 to i32
- ret i32 %6
-}
-
-
-define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
-; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
-; VLX: # %bb.0: # %entry
-; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; VLX-NEXT: kmovq %k0, %rax
-; VLX-NEXT: vzeroupper
-; VLX-NEXT: retq
-;
-; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
-; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+ %0 = bitcast <8 x i64> %__a to <16 x i32>
+ %load = load i32, i32* %__b
+ %vec = insertelement <16 x i32> undef, i32 %load, i32 0
+ %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = icmp sge <16 x i32> %0, %1
+ %3 = bitcast i16 %__u to <16 x i1>
+ %4 = and <16 x i1> %3, %2
+ %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ %6 = bitcast <32 x i1> %5 to i32
+ ret i32 %6
+}
+
+
+define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
+; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
+; VLX: # %bb.0: # %entry
+; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; VLX-NEXT: kmovq %k0, %rax
+; VLX-NEXT: vzeroupper
+; VLX-NEXT: retq
+;
+; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
+; NoVLX: # %bb.0: # %entry
+; NoVLX-NEXT: pushq %rbp
+; NoVLX-NEXT: .cfi_def_cfa_offset 16
+; NoVLX-NEXT: .cfi_offset %rbp, -16
+; NoVLX-NEXT: movq %rsp, %rbp
+; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: andq $-32, %rsp
+; NoVLX-NEXT: subq $64, %rsp
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -28740,12 +24565,7 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -28773,86 +24593,15 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -28861,12 +24610,7 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -28896,87 +24640,16 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -28985,12 +24658,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -29021,87 +24689,16 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -29110,12 +24707,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -29147,86 +24739,15 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -29235,12 +24756,7 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -29271,87 +24787,16 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -29360,12 +24805,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -33571,18 +29011,8 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
@@ -33590,83 +29020,17 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -33696,18 +29060,8 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
@@ -33715,83 +29069,17 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -33914,18 +29202,8 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
@@ -33933,74 +29211,13 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -34009,12 +29226,7 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -34044,18 +29256,8 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
@@ -34063,74 +29265,13 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -34139,12 +29280,7 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -35272,18 +30408,8 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
@@ -35291,83 +30417,17 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -35398,18 +30458,8 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
@@ -35417,83 +30467,17 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -35623,18 +30607,8 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
@@ -35642,74 +30616,13 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -35718,12 +30631,7 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -35754,18 +30662,8 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
@@ -35773,74 +30671,13 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -35849,12 +30686,7 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39303,95 +34135,19 @@ define zeroext i32 @test_vpcmpultd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39419,95 +34175,19 @@ define zeroext i32 @test_vpcmpultd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39537,96 +34217,20 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39657,96 +34261,20 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39778,95 +34306,19 @@ define zeroext i32 @test_vpcmpultd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -39897,96 +34349,20 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -40019,86 +34395,15 @@ define zeroext i64 @test_vpcmpultd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40107,12 +34412,7 @@ define zeroext i64 @test_vpcmpultd_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -40140,86 +34440,15 @@ define zeroext i64 @test_vpcmpultd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40228,12 +34457,7 @@ define zeroext i64 @test_vpcmpultd_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -40263,87 +34487,16 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40352,12 +34505,7 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -40388,87 +34536,16 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40477,12 +34554,7 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -40514,86 +34586,15 @@ define zeroext i64 @test_vpcmpultd_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40602,12 +34603,7 @@ define zeroext i64 @test_vpcmpultd_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -40638,87 +34634,16 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -40727,12 +34652,7 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -47358,95 +41278,19 @@ define zeroext i32 @test_vcmpoeqps_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -47474,95 +41318,19 @@ define zeroext i32 @test_vcmpoeqps_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -47591,95 +41359,19 @@ define zeroext i32 @test_vcmpoeqps_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -47710,96 +41402,20 @@ define zeroext i32 @test_masked_vcmpoeqp
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -47830,96 +41446,20 @@ define zeroext i32 @test_masked_vcmpoeqp
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -47951,96 +41491,20 @@ define zeroext i32 @test_masked_vcmpoeqp
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -48115,86 +41579,15 @@ define zeroext i64 @test_vcmpoeqps_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -48203,12 +41596,7 @@ define zeroext i64 @test_vcmpoeqps_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -48236,86 +41624,15 @@ define zeroext i64 @test_vcmpoeqps_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -48324,12 +41641,7 @@ define zeroext i64 @test_vcmpoeqps_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -48358,86 +41670,15 @@ define zeroext i64 @test_vcmpoeqps_v16i1
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -48446,12 +41687,7 @@ define zeroext i64 @test_vcmpoeqps_v16i1
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -48482,87 +41718,16 @@ define zeroext i64 @test_masked_vcmpoeqp
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -48571,12 +41736,7 @@ define zeroext i64 @test_masked_vcmpoeqp
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -48607,87 +41767,16 @@ define zeroext i64 @test_masked_vcmpoeqp
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -48696,12 +41785,7 @@ define zeroext i64 @test_masked_vcmpoeqp
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -48733,87 +41817,16 @@ define zeroext i64 @test_masked_vcmpoeqp
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -48822,12 +41835,7 @@ define zeroext i64 @test_masked_vcmpoeqp
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -52842,95 +45850,19 @@ define i32 @test_cmpm_rnd_zero(<16 x flo
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: pushq %r15
-; NoVLX-NEXT: pushq %r14
-; NoVLX-NEXT: pushq %r13
-; NoVLX-NEXT: pushq %r12
-; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .cfi_offset %r15, -24
-; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kxorw %k0, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kshiftlw $14, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r11d
-; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r14d
-; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r15d
-; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r12d
-; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %r13d
-; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ebx
-; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k1
+; NoVLX-NEXT: kxorw %k0, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: leaq -40(%rbp), %rsp
-; NoVLX-NEXT: popq %rbx
-; NoVLX-NEXT: popq %r12
-; NoVLX-NEXT: popq %r13
-; NoVLX-NEXT: popq %r14
-; NoVLX-NEXT: popq %r15
+; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=320675&r1=320674&r2=320675&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Thu Dec 14 00:26:00 2017
@@ -2383,134 +2383,13 @@ define <32 x i1> @test_cmp_v32f32(<32 x
;
; AVX512F-LABEL: test_cmp_v32f32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vcmpltps %zmm1, %zmm3, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: vcmpltps %zmm0, %zmm2, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vcmpltps %zmm0, %zmm2, %k1
+; AVX512F-NEXT: movl {{.*}}(%rip), %eax
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vcmpltps %zmm1, %zmm3, %k1
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
@@ -2520,134 +2399,13 @@ define <32 x i1> @test_cmp_v32f32(<32 x
;
; AVX512DQ-LABEL: test_cmp_v32f32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm1
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm0
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1
+; AVX512DQ-NEXT: movl {{.*}}(%rip), %eax
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
@@ -3134,134 +2892,13 @@ define <32 x i1> @test_cmp_v32i32(<32 x
;
; AVX512F-LABEL: test_cmp_v32i32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm1
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $7, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $6, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $5, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $4, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $3, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $2, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $1, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
+; AVX512F-NEXT: movl {{.*}}(%rip), %eax
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm1, %k1
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
@@ -3271,134 +2908,13 @@ define <32 x i1> @test_cmp_v32i32(<32 x
;
; AVX512DQ-LABEL: test_cmp_v32i32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpcmpgtd %zmm3, %zmm1, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm1
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
-; AVX512DQ-NEXT: kshiftlw $14, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlw $15, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm0
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $13, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $12, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $11, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $10, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $9, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $8, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrw $15, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrw $15, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
+; AVX512DQ-NEXT: movl {{.*}}(%rip), %eax
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vpcmpgtd %zmm3, %zmm1, %k1
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
@@ -6446,279 +5962,41 @@ define <32 x i1> @test_cmp_v32f64(<32 x
;
; AVX512F-LABEL: test_cmp_v32f64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vcmpltpd %zmm2, %zmm6, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm2
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: vcmpltpd %zmm3, %zmm7, %k0
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; AVX512F-NEXT: vcmpltpd %zmm0, %zmm4, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: vcmpltpd %zmm1, %zmm5, %k0
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vcmpltpd %zmm1, %zmm5, %k1
+; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512F-NEXT: movl {{.*}}(%rip), %eax
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vcmpltpd %zmm2, %zmm6, %k0
+; AVX512F-NEXT: vcmpltpd %zmm3, %zmm7, %k1
+; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: test_cmp_v32f64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vcmpltpd %zmm2, %zmm6, %k0
-; AVX512DQ-NEXT: kshiftlb $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm2
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: vcmpltpd %zmm3, %zmm7, %k0
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm4, %k0
-; AVX512DQ-NEXT: kshiftlb $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm0
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm5, %k0
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm5, %k1
+; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512DQ-NEXT: movl {{.*}}(%rip), %eax
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vcmpltpd %zmm2, %zmm6, %k0
+; AVX512DQ-NEXT: vcmpltpd %zmm3, %zmm7, %k1
+; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512DQ-NEXT: retq
;
@@ -7307,139 +6585,18 @@ define <32 x i1> @test_cmp_v32i64(<32 x
;
; AVX512F-LABEL: test_cmp_v32i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpgtq %zmm6, %zmm2, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm2
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: vpcmpgtq %zmm7, %zmm3, %k0
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpgtq %zmm4, %zmm0, %k0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %ecx
-; AVX512F-NEXT: vmovd %ecx, %xmm0
-; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: vpcmpgtq %zmm5, %zmm1, %k0
-; AVX512F-NEXT: kshiftlw $15, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $14, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $13, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $12, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $11, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $10, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $9, %k0, %k1
-; AVX512F-NEXT: kshiftrw $15, %k1, %k1
-; AVX512F-NEXT: kmovw %k1, %eax
-; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: kshiftlw $8, %k0, %k0
-; AVX512F-NEXT: kshiftrw $15, %k0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpcmpgtq %zmm5, %zmm1, %k1
+; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512F-NEXT: movl {{.*}}(%rip), %eax
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpcmpgtq %zmm6, %zmm2, %k0
+; AVX512F-NEXT: vpcmpgtq %zmm7, %zmm3, %k1
+; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
@@ -7448,135 +6605,18 @@ define <32 x i1> @test_cmp_v32i64(<32 x
;
; AVX512DQ-LABEL: test_cmp_v32i64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpcmpgtq %zmm6, %zmm2, %k0
-; AVX512DQ-NEXT: kshiftlb $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm2
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: vpcmpgtq %zmm7, %zmm3, %k0
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftlb $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; AVX512DQ-NEXT: vpcmpgtq %zmm4, %zmm0, %k0
-; AVX512DQ-NEXT: kshiftlb $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %ecx
-; AVX512DQ-NEXT: vmovd %ecx, %xmm0
-; AVX512DQ-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: vpcmpgtq %zmm5, %zmm1, %k0
-; AVX512DQ-NEXT: kshiftlb $7, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $6, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $5, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $4, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $3, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $2, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftlb $1, %k0, %k1
-; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1
-; AVX512DQ-NEXT: kmovw %k1, %eax
-; AVX512DQ-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: kshiftrb $7, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpcmpgtq %zmm5, %zmm1, %k1
+; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512DQ-NEXT: movl {{.*}}(%rip), %eax
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512DQ-NEXT: vpcmpgtq %zmm6, %zmm2, %k0
+; AVX512DQ-NEXT: vpcmpgtq %zmm7, %zmm3, %k1
+; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
More information about the llvm-commits
mailing list