[llvm] r325933 - [X86] Custom split v32i16/v64i8 bitcasts when AVX512F is available, but BWI is not.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 23 10:43:36 PST 2018
Author: ctopper
Date: Fri Feb 23 10:43:36 2018
New Revision: 325933
URL: http://llvm.org/viewvc/llvm-project?rev=325933&view=rev
Log:
[X86] Custom split v32i16/v64i8 bitcasts when AVX512F is available, but BWI is not.
The test changes you can see are related to the changes in ReplaceNodeResults. Though shuffle-vs-trunc-512.ll does have a test that exercises the code in LowerBITCAST. Looks like the test output didn't change because DAG combining is able to clean up the resulting type legalization. Adding the custom hook just makes type legalization work less hard.
Differential Revision: https://reviews.llvm.org/D43447
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=325933&r1=325932&r2=325933&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Feb 23 10:43:36 2018
@@ -1365,6 +1365,12 @@ X86TargetLowering::X86TargetLowering(con
setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
}
+
+ // Need to custom split v32i16/v64i8 bitcasts.
+ if (!Subtarget.hasBWI()) {
+ setOperationAction(ISD::BITCAST, MVT::v32i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v64i8, Custom);
+ }
}// has AVX-512
// This block controls legalization for operations that don't have
@@ -21779,8 +21785,9 @@ static SDValue LowerVectorIntUnary(SDVal
// Extract the Lo/Hi vectors
SDLoc dl(Op);
SDValue Src = Op.getOperand(0);
+ unsigned SrcNumElems = Src.getSimpleValueType().getVectorNumElements();
SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2);
- SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2);
+ SDValue Hi = extractSubVector(Src, SrcNumElems / 2, DAG, dl, SizeInBits / 2);
MVT EltVT = VT.getVectorElementType();
MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
@@ -23745,6 +23752,10 @@ static SDValue LowerBITCAST(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
}
+ // Custom splitting for BWI types when AVX512F is available but BWI isn't.
+ if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector())
+ return Lower512IntUnary(Op, DAG);
+
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
SrcVT == MVT::i64) {
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
@@ -25132,6 +25143,14 @@ void X86TargetLowering::ReplaceNodeResul
Results.push_back(Res);
return;
}
+
+ // Custom splitting for BWI types when AVX512F is available but BWI isn't.
+ if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) &&
+ SrcVT.isVector() && isTypeLegal(SrcVT)) {
+ SDValue Res = Lower512IntUnary(SDValue(N, 0), DAG);
+ Results.push_back(Res);
+ return;
+ }
if (SrcVT != MVT::f64 ||
(DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=325933&r1=325932&r2=325933&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Fri Feb 23 10:43:36 2018
@@ -926,182 +926,14 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: shll $16, %eax
@@ -1127,91 +959,7 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1245,190 +993,22 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm5
-; NoVLX-NEXT: vmovq %xmm9, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm5
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm5, %ymm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: andl %edi, %edx
-; NoVLX-NEXT: shll $16, %edx
-; NoVLX-NEXT: movzwl %cx, %eax
-; NoVLX-NEXT: orl %edx, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1453,97 +1033,13 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
-; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6319,182 +5815,14 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: shll $16, %eax
@@ -6520,91 +5848,7 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6638,190 +5882,22 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm5
-; NoVLX-NEXT: vmovq %xmm9, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm5
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm5, %ymm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: andl %edi, %edx
-; NoVLX-NEXT: shll $16, %edx
-; NoVLX-NEXT: movzwl %cx, %eax
-; NoVLX-NEXT: orl %edx, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6846,97 +5922,13 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
-; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -11796,185 +10788,17 @@ define zeroext i64 @test_vpcmpsgew_v32i1
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
+; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
@@ -12001,95 +10825,11 @@ define zeroext i64 @test_vpcmpsgew_v32i1
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
+; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
-; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -12125,194 +10865,26 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vmovq %xmm9, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm4
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2
+; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm4, %ymm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: andl %edi, %edx
-; NoVLX-NEXT: shll $16, %edx
-; NoVLX-NEXT: movzwl %cx, %eax
-; NoVLX-NEXT: orl %edx, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12337,100 +10909,16 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
-; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1
+; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
@@ -17309,185 +15797,17 @@ define zeroext i64 @test_vpcmpultw_v32i1
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpmaxuw %ymm2, %ymm0, %ymm2
-; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vpmaxuw %ymm3, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm0
+; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -17516,91 +15836,7 @@ define zeroext i64 @test_vpcmpultw_v32i1
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm2
; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
@@ -17640,196 +15876,28 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vmovq %xmm9, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm4
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm5, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: movl %edx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rdx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpmaxuw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2
+; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
+; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm4, %ymm1
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
-; NoVLX-NEXT: vpmaxuw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: andl %edi, %edx
-; NoVLX-NEXT: shll $16, %edx
-; NoVLX-NEXT: movzwl %cx, %eax
-; NoVLX-NEXT: orl %edx, %eax
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17854,100 +15922,16 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %edx
-; NoVLX-NEXT: shrl $16, %edx
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rdx
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %edx, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rdx, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: shrq $48, %rdx
-; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
-; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1
+; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
More information about the llvm-commits
mailing list