[llvm] r324184 - [X86] Add DAG combine to turn (bitcast (and/or/xor (bitcast X), Y)) -> (and/or/xor X, (bitcast Y)) when casting between GPRs and mask operations.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 3 17:43:48 PST 2018
Author: ctopper
Date: Sat Feb 3 17:43:48 2018
New Revision: 324184
URL: http://llvm.org/viewvc/llvm-project?rev=324184&view=rev
Log:
[X86] Add DAG combine to turn (bitcast (and/or/xor (bitcast X), Y)) -> (and/or/xor X, (bitcast Y)) when casting between GPRs and mask operations.
This reduces the number of transitions between k-registers and GPRs, reducing the number of instructions.
There's still some room for improvement to remove more transitions, but this is a good start.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512bw-vec-test-testn.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll
llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512f-vec-test-testn.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
llvm/trunk/test/CodeGen/X86/avx512vl-vec-test-testn.ll
llvm/trunk/test/CodeGen/X86/combine-testm-and.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 3 17:43:48 2018
@@ -30466,6 +30466,54 @@ static SDValue combineBitcastvxi1(Select
return DAG.getZExtOrTrunc(V, DL, VT);
}
+static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ assert(N->getOpcode() == ISD::BITCAST && "Expected a bitcast");
+
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ // Only do this if we have k-registers.
+ if (!Subtarget.hasAVX512())
+ return SDValue();
+
+ EVT DstVT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+
+ if (!Op.hasOneUse())
+ return SDValue();
+
+ // Look for logic ops.
+ if (Op.getOpcode() != ISD::AND &&
+ Op.getOpcode() != ISD::OR &&
+ Op.getOpcode() != ISD::XOR)
+ return SDValue();
+
+ // Make sure we have a bitcast between mask registers and a scalar type.
+ if (!(SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
+ DstVT.isScalarInteger()) &&
+ !(DstVT.isVector() && DstVT.getVectorElementType() == MVT::i1 &&
+ SrcVT.isScalarInteger()))
+ return SDValue();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ if (LHS.hasOneUse() && LHS.getOpcode() == ISD::BITCAST &&
+ LHS.getOperand(0).getValueType() == DstVT)
+ return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, LHS.getOperand(0),
+ DAG.getBitcast(DstVT, RHS));
+
+ if (RHS.hasOneUse() && RHS.getOpcode() == ISD::BITCAST &&
+ RHS.getOperand(0).getValueType() == DstVT)
+ return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT,
+ DAG.getBitcast(DstVT, LHS), RHS.getOperand(0));
+
+ return SDValue();
+}
+
static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -30551,6 +30599,11 @@ static SDValue combineBitcast(SDNode *N,
}
}
+ // Try to remove bitcasts from input and output of mask arithmetic to
+ // remove GPR<->K-register crossings.
+ if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget))
+ return V;
+
// Convert a bitcasted integer logic operation that has one bitcasted
// floating-point operand into a floating-point logic operation. This may
// create a load of a constant, but that is cheaper than materializing the
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Sat Feb 3 17:43:48 2018
@@ -568,9 +568,9 @@ define i16 @test_pcmpeq_d(<16 x i32> %a,
define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_d:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
@@ -593,9 +593,9 @@ define i8 @test_pcmpeq_q(<8 x i64> %a, <
define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_q:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andb %dil, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
@@ -618,9 +618,9 @@ define i16 @test_pcmpgt_d(<16 x i32> %a,
define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_d:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
@@ -643,9 +643,9 @@ define i8 @test_pcmpgt_q(<8 x i64> %a, <
define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_q:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andb %dil, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
@@ -3687,11 +3687,9 @@ define i8 @test_vptestmq(<8 x i64> %a0,
; CHECK-LABEL: test_vptestmq:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addb %cl, %al
+; CHECK-NEXT: andb %al, %dil
+; CHECK-NEXT: addb %dil, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
@@ -3705,11 +3703,9 @@ define i16 @test_vptestmd(<16 x i32> %a0
; CHECK-LABEL: test_vptestmd:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: andl %eax, %edi
+; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
@@ -3725,11 +3721,9 @@ define i16 at test_int_x86_avx512_ptestnm_d
; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k1 {%k1}
-; CHECK-NEXT: kmovw %k1, %ecx
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: andl %eax, %edi
+; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
@@ -3744,11 +3738,9 @@ define i8 at test_int_x86_avx512_ptestnm_q_
; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_512:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k1 {%k1}
-; CHECK-NEXT: kmovw %k1, %ecx
; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addb %cl, %al
+; CHECK-NEXT: andb %al, %dil
+; CHECK-NEXT: addb %dil, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
@@ -3762,12 +3754,8 @@ define i16 @test_kand(i16 %a0, i16 %a1)
; CHECK-LABEL: test_kand:
; CHECK: ## %bb.0:
; CHECK-NEXT: andl %esi, %edi
-; CHECK-NEXT: kmovw %edi, %k0
-; CHECK-NEXT: movw $8, %ax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: kandw %k1, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: andl $8, %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
@@ -3782,9 +3770,9 @@ define i16 @test_kandn(i16 %a0, i16 %a1)
; CHECK-NEXT: movw $8, %ax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: kandnw %k1, %k0, %k0
-; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: kandnw %k1, %k0, %k0
+; CHECK-NEXT: knotw %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl %esi, %eax
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
@@ -3796,10 +3784,8 @@ declare i16 @llvm.x86.avx512.knot.w(i16)
define i16 @test_knot(i16 %a0) {
; CHECK-LABEL: test_knot:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k0
-; CHECK-NEXT: knotw %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
ret i16 %res
@@ -3810,12 +3796,8 @@ define i16 @test_kor(i16 %a0, i16 %a1) {
; CHECK-LABEL: test_kor:
; CHECK: ## %bb.0:
; CHECK-NEXT: orl %esi, %edi
-; CHECK-NEXT: kmovw %edi, %k0
-; CHECK-NEXT: movw $8, %ax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: korw %k1, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: orl $8, %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1)
@@ -3829,12 +3811,8 @@ define i16 @test_kxnor(i16 %a0, i16 %a1)
; CHECK-LABEL: test_kxnor:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %esi, %edi
-; CHECK-NEXT: kmovw %edi, %k0
-; CHECK-NEXT: movw $8, %ax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: kxorw %k1, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: xorl $8, %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1)
@@ -3846,12 +3824,8 @@ define i16 @test_kxor(i16 %a0, i16 %a1)
; CHECK-LABEL: test_kxor:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %esi, %edi
-; CHECK-NEXT: kmovw %edi, %k0
-; CHECK-NEXT: movw $8, %ax
-; CHECK-NEXT: kmovw %eax, %k1
-; CHECK-NEXT: kxorw %k1, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
+; CHECK-NEXT: xorl $8, %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)
%t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1)
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Sat Feb 3 17:43:48 2018
@@ -6,37 +6,11 @@
define i16 @mask16(i16 %x) {
-; KNL-LABEL: mask16:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: knotw %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: ## kill: def $ax killed $ax killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: mask16:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: knotw %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: ## kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: mask16:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: knotw %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: ## kill: def $ax killed $ax killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: mask16:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: knotw %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: ## kill: def $ax killed $ax killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: mask16:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <16 x i1> %m1 to i16
@@ -44,33 +18,11 @@ define i16 @mask16(i16 %x) {
}
define i32 @mask16_zext(i16 %x) {
-; KNL-LABEL: mask16_zext:
-; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: knotw %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: mask16_zext:
-; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: knotw %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: mask16_zext:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: knotw %k0, %k0
-; AVX512BW-NEXT: kmovw %k0, %eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: mask16_zext:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: knotw %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: mask16_zext:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: retq
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%m2 = bitcast <16 x i1> %m1 to i16
@@ -81,33 +33,33 @@ define i32 @mask16_zext(i16 %x) {
define i8 @mask8(i8 %x) {
; KNL-LABEL: mask8:
; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: kxnorw %k0, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: xorb %dil, %al
; KNL-NEXT: ## kill: def $al killed $al killed $eax
; KNL-NEXT: retq
;
; SKX-LABEL: mask8:
; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: knotb %k0, %k0
+; SKX-NEXT: kxnorw %k0, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
+; SKX-NEXT: xorb %dil, %al
; SKX-NEXT: ## kill: def $al killed $al killed $eax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: mask8:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: knotw %k0, %k0
+; AVX512BW-NEXT: kxnorw %k0, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: xorb %dil, %al
; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: mask8:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: knotb %k0, %k0
+; AVX512DQ-NEXT: kxnorw %k0, %k0, %k0
; AVX512DQ-NEXT: kmovw %k0, %eax
+; AVX512DQ-NEXT: xorb %dil, %al
; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT: retq
%m0 = bitcast i8 %x to <8 x i1>
@@ -119,32 +71,34 @@ define i8 @mask8(i8 %x) {
define i32 @mask8_zext(i8 %x) {
; KNL-LABEL: mask8_zext:
; KNL: ## %bb.0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: kxnorw %k0, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: xorb %dil, %al
; KNL-NEXT: movzbl %al, %eax
; KNL-NEXT: retq
;
; SKX-LABEL: mask8_zext:
; SKX: ## %bb.0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: knotb %k0, %k0
-; SKX-NEXT: kmovb %k0, %eax
+; SKX-NEXT: kxnorw %k0, %k0, %k0
+; SKX-NEXT: kmovd %k0, %eax
+; SKX-NEXT: xorb %dil, %al
+; SKX-NEXT: movzbl %al, %eax
; SKX-NEXT: retq
;
; AVX512BW-LABEL: mask8_zext:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k0
-; AVX512BW-NEXT: knotw %k0, %k0
+; AVX512BW-NEXT: kxnorw %k0, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: xorb %dil, %al
; AVX512BW-NEXT: movzbl %al, %eax
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: mask8_zext:
; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kmovw %edi, %k0
-; AVX512DQ-NEXT: knotb %k0, %k0
-; AVX512DQ-NEXT: kmovb %k0, %eax
+; AVX512DQ-NEXT: kxnorw %k0, %k0, %k0
+; AVX512DQ-NEXT: kmovw %k0, %eax
+; AVX512DQ-NEXT: xorb %dil, %al
+; AVX512DQ-NEXT: movzbl %al, %eax
; AVX512DQ-NEXT: retq
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -171,11 +125,9 @@ define void @mask16_mem(i16* %ptr) {
define void @mask8_mem(i8* %ptr) {
; KNL-LABEL: mask8_mem:
; KNL: ## %bb.0:
-; KNL-NEXT: movzbl (%rdi), %eax
-; KNL-NEXT: kmovw %eax, %k0
-; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: kxnorw %k0, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: movb %al, (%rdi)
+; KNL-NEXT: xorb %al, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: mask8_mem:
@@ -187,11 +139,9 @@ define void @mask8_mem(i8* %ptr) {
;
; AVX512BW-LABEL: mask8_mem:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: movzbl (%rdi), %eax
-; AVX512BW-NEXT: kmovd %eax, %k0
-; AVX512BW-NEXT: knotw %k0, %k0
+; AVX512BW-NEXT: kxnorw %k0, %k0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: movb %al, (%rdi)
+; AVX512BW-NEXT: xorb %al, (%rdi)
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: mask8_mem:
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Sat Feb 3 17:43:48 2018
@@ -6663,18 +6663,14 @@ define <8 x double> @mov_test47(i8 * %ad
define i16 @mask16(i16 %x) {
; GENERIC-LABEL: mask16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
+; GENERIC-NEXT: notl %edi # sched: [1:0.33]
+; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask16:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
+; SKX-NEXT: notl %edi # sched: [1:0.25]
+; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -6685,16 +6681,15 @@ define i16 @mask16(i16 %x) {
define i32 @mask16_zext(i16 %x) {
; GENERIC-LABEL: mask16_zext:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33]
+; GENERIC-NEXT: notl %edi # sched: [1:0.33]
+; GENERIC-NEXT: movzwl %di, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask16_zext:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00]
+; SKX-NEXT: movl $65535, %eax # imm = 0xFFFF
+; SKX-NEXT: # sched: [1:0.25]
+; SKX-NEXT: andnl %eax, %edi, %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -6706,17 +6701,17 @@ define i32 @mask16_zext(i16 %x) {
define i8 @mask8(i8 %x) {
; GENERIC-LABEL: mask8:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
+; GENERIC-NEXT: xorb %dil, %al # sched: [1:0.33]
; GENERIC-NEXT: # kill: def $al killed $al killed $eax
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask8:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00]
+; SKX-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
+; SKX-NEXT: xorb %dil, %al # sched: [1:0.25]
; SKX-NEXT: # kill: def $al killed $al killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
%m0 = bitcast i8 %x to <8 x i1>
@@ -6728,16 +6723,18 @@ define i8 @mask8(i8 %x) {
define i32 @mask8_zext(i8 %x) {
; GENERIC-LABEL: mask8_zext:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33]
+; GENERIC-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
+; GENERIC-NEXT: xorb %dil, %al # sched: [1:0.33]
+; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask8_zext:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, %eax # sched: [3:1.00]
+; SKX-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
+; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
+; SKX-NEXT: xorb %dil, %al # sched: [1:0.25]
+; SKX-NEXT: movzbl %al, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Sat Feb 3 17:43:48 2018
@@ -624,14 +624,12 @@ entry:
define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_mask_test_epi8_mask:
; X32: # %bb.0: # %entry
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X32-NEXT: vptestmb %zmm0, %zmm1, %k2
-; X32-NEXT: kandd %k1, %k2, %k1
-; X32-NEXT: kmovd %k1, %eax
-; X32-NEXT: kshiftrq $32, %k2, %k1
-; X32-NEXT: kandd %k0, %k1, %k0
-; X32-NEXT: kmovd %k0, %edx
+; X32-NEXT: vptestmb %zmm0, %zmm1, %k0
+; X32-NEXT: kshiftrq $32, %k0, %k1
+; X32-NEXT: kmovd %k1, %edx
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %edx
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
@@ -727,14 +725,12 @@ entry:
define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_mask_testn_epi8_mask:
; X32: # %bb.0: # %entry
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; X32-NEXT: vptestnmb %zmm0, %zmm1, %k2
-; X32-NEXT: kandd %k1, %k2, %k1
-; X32-NEXT: kmovd %k1, %eax
-; X32-NEXT: kshiftrq $32, %k2, %k1
-; X32-NEXT: kandd %k0, %k1, %k0
-; X32-NEXT: kmovd %k0, %edx
+; X32-NEXT: vptestnmb %zmm0, %zmm1, %k0
+; X32-NEXT: kshiftrq $32, %k0, %k1
+; X32-NEXT: kmovd %k1, %edx
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %edx
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Sat Feb 3 17:43:48 2018
@@ -402,19 +402,20 @@ define i64 @test_pcmpeq_b(<64 x i8> %a,
define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512BW-LABEL: test_mask_pcmpeq_b:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovq %rdi, %k1
-; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: andq %rdi, %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpeq_b:
; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1
-; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: kmovd %k1, %edx
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: andl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
@@ -444,17 +445,17 @@ define i32 @test_pcmpeq_w(<32 x i16> %a,
define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_pcmpeq_w:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k1
-; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: andl %edi, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpeq_w:
; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: andl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
@@ -486,19 +487,20 @@ define i64 @test_pcmpgt_b(<64 x i8> %a,
define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
; AVX512BW-LABEL: test_mask_pcmpgt_b:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovq %rdi, %k1
-; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: andq %rdi, %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpgt_b:
; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1
-; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: kmovd %k1, %edx
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: andl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
%res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
@@ -528,17 +530,17 @@ define i32 @test_pcmpgt_w(<32 x i16> %a,
define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
; AVX512BW-LABEL: test_mask_pcmpgt_w:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kmovd %edi, %k1
-; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: andl %edi, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_pcmpgt_w:
; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: andl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
@@ -2480,11 +2482,9 @@ define i64 at test_int_x86_avx512_ptestm_b_
; AVX512BW-LABEL: test_int_x86_avx512_ptestm_b_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovq %rdi, %k1
-; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k1 {%k1}
-; AVX512BW-NEXT: kmovq %k1, %rcx
; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: andq %rax, %rdi
+; AVX512BW-NEXT: addq %rdi, %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -2494,14 +2494,13 @@ define i64 at test_int_x86_avx512_ptestm_b_
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %esi, -8
; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0
-; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k1 {%k1}
-; AVX512F-32-NEXT: kshiftrq $32, %k1, %k2
-; AVX512F-32-NEXT: kmovd %k2, %ecx
-; AVX512F-32-NEXT: kmovd %k1, %esi
; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1
-; AVX512F-32-NEXT: kmovd %k1, %edx
-; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: kmovd %k1, %ecx
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: andl %ecx, %edx
+; AVX512F-32-NEXT: kmovd %k0, %esi
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: andl %esi, %eax
; AVX512F-32-NEXT: addl %esi, %eax
; AVX512F-32-NEXT: adcl %ecx, %edx
; AVX512F-32-NEXT: popl %esi
@@ -2519,21 +2518,18 @@ define i32 at test_int_x86_avx512_ptestm_w_
; AVX512BW-LABEL: test_int_x86_avx512_ptestm_w_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %edi, %k1
-; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k1 {%k1}
-; AVX512BW-NEXT: kmovd %k1, %ecx
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: andl %eax, %edi
+; AVX512BW-NEXT: addl %edi, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_w_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k1 {%k1}
-; AVX512F-32-NEXT: kmovd %k1, %ecx
-; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: andl %ecx, %eax
; AVX512F-32-NEXT: addl %ecx, %eax
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
@@ -2549,11 +2545,9 @@ define i64 at test_int_x86_avx512_ptestnm_b
; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_b_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovq %rdi, %k1
-; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k1 {%k1}
-; AVX512BW-NEXT: kmovq %k1, %rcx
; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: andq %rax, %rdi
+; AVX512BW-NEXT: addq %rdi, %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -2563,14 +2557,13 @@ define i64 at test_int_x86_avx512_ptestnm_b
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: .cfi_offset %esi, -8
; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0
-; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k1 {%k1}
-; AVX512F-32-NEXT: kshiftrq $32, %k1, %k2
-; AVX512F-32-NEXT: kmovd %k2, %ecx
-; AVX512F-32-NEXT: kmovd %k1, %esi
; AVX512F-32-NEXT: kshiftrq $32, %k0, %k1
-; AVX512F-32-NEXT: kmovd %k1, %edx
-; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: kmovd %k1, %ecx
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: andl %ecx, %edx
+; AVX512F-32-NEXT: kmovd %k0, %esi
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: andl %esi, %eax
; AVX512F-32-NEXT: addl %esi, %eax
; AVX512F-32-NEXT: adcl %ecx, %edx
; AVX512F-32-NEXT: popl %esi
@@ -2588,21 +2581,18 @@ define i32 at test_int_x86_avx512_ptestnm_w
; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_w_512:
; AVX512BW: ## %bb.0:
; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %edi, %k1
-; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k1 {%k1}
-; AVX512BW-NEXT: kmovd %k1, %ecx
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: andl %eax, %edi
+; AVX512BW-NEXT: addl %edi, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_w_512:
; AVX512F-32: # %bb.0:
; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k1 {%k1}
-; AVX512F-32-NEXT: kmovd %k1, %ecx
-; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: kmovd %k0, %ecx
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: andl %ecx, %eax
; AVX512F-32-NEXT: addl %ecx, %eax
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll Sat Feb 3 17:43:48 2018
@@ -4,9 +4,9 @@
define i32 @mask32(i32 %x) {
; CHECK-LABEL: mask32:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k0
-; CHECK-NEXT: knotd %k0, %k0
+; CHECK-NEXT: kxnord %k0, %k0, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: xorl %edi, %eax
; CHECK-NEXT: retq
%m0 = bitcast i32 %x to <32 x i1>
%m1 = xor <32 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
@@ -20,9 +20,9 @@ define i32 @mask32(i32 %x) {
define i64 @mask64(i64 %x) {
; CHECK-LABEL: mask64:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovq %rdi, %k0
-; CHECK-NEXT: knotq %k0, %k0
+; CHECK-NEXT: kxnorq %k0, %k0, %k0
; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: xorq %rdi, %rax
; CHECK-NEXT: retq
%m0 = bitcast i64 %x to <64 x i1>
%m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-vec-test-testn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-vec-test-testn.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-vec-test-testn.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-vec-test-testn.ll Sat Feb 3 17:43:48 2018
@@ -38,9 +38,9 @@ entry:
define zeroext i32 @TEST_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_test_epi16_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
@@ -57,9 +57,9 @@ entry:
define zeroext i64 @TEST_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_test_epi8_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: vptestmb %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
@@ -109,9 +109,9 @@ entry:
define zeroext i32 @TEST_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_testn_epi16_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
@@ -128,9 +128,9 @@ entry:
define zeroext i64 @TEST_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_testn_epi8_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovq %rdi, %k1
-; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: vptestnmb %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovq %k0, %rax
+; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll Sat Feb 3 17:43:48 2018
@@ -487,9 +487,9 @@ define i32 @test_pcmpeq_b_256(<32 x i8>
define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_b_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
+; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
@@ -513,9 +513,9 @@ define i16 @test_pcmpeq_w_256(<16 x i16>
define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_w_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -539,9 +539,9 @@ define i32 @test_pcmpgt_b_256(<32 x i8>
define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_b_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
+; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
@@ -565,9 +565,9 @@ define i16 @test_pcmpgt_w_256(<16 x i16>
define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_w_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1]
+; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -591,9 +591,9 @@ define i16 @test_pcmpeq_b_128(<16 x i8>
define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_b_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
+; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
@@ -616,9 +616,9 @@ define i8 @test_pcmpeq_w_128(<8 x i16> %
define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_w_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
+; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8]
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
@@ -641,9 +641,9 @@ define i16 @test_pcmpgt_b_128(<16 x i8>
define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_b_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc1]
+; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
@@ -666,9 +666,9 @@ define i8 @test_pcmpgt_w_128(<8 x i16> %
define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_w_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc1]
+; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8]
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
@@ -3678,11 +3678,9 @@ define i16 at test_int_x86_avx512_ptestm_b_
; CHECK-LABEL: test_int_x86_avx512_ptestm_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x26,0xc9]
-; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
+; CHECK-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
@@ -3697,11 +3695,9 @@ define i32 at test_int_x86_avx512_ptestm_b_
; CHECK-LABEL: test_int_x86_avx512_ptestm_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x26,0xc9]
-; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
+; CHECK-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
@@ -3716,11 +3712,9 @@ define i8 at test_int_x86_avx512_ptestm_w_1
; CHECK-LABEL: test_int_x86_avx512_ptestm_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x26,0xc9]
-; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7]
+; CHECK-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8]
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
@@ -3735,11 +3729,9 @@ define i16 at test_int_x86_avx512_ptestm_w_
; CHECK-LABEL: test_int_x86_avx512_ptestm_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x26,0xc9]
-; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
+; CHECK-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -3755,11 +3747,9 @@ define i16 at test_int_x86_avx512_ptestnm_b
; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x26,0xc9]
-; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
+; CHECK-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
@@ -3774,11 +3764,9 @@ define i32 at test_int_x86_avx512_ptestnm_b
; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x26,0xc9]
-; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
+; CHECK-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
@@ -3793,11 +3781,9 @@ define i8 at test_int_x86_avx512_ptestnm_w_
; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_128:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x26,0xc9]
-; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7]
+; CHECK-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8]
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
@@ -3812,11 +3798,9 @@ define i16 at test_int_x86_avx512_ptestnm_w
; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x26,0xc9]
-; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
+; CHECK-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-vec-test-testn.ll Sat Feb 3 17:43:48 2018
@@ -21,9 +21,9 @@ entry:
define zeroext i16 @TEST_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_mask_test_epi8_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
entry:
@@ -56,9 +56,9 @@ entry:
define zeroext i8 @TEST_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_mask_test_epi16_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
entry:
@@ -91,9 +91,9 @@ entry:
define zeroext i16 @TEST_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_mask_testn_epi8_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
entry:
@@ -126,9 +126,9 @@ entry:
define zeroext i8 @TEST_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm_mask_testn_epi16_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1}
+; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
entry:
@@ -161,9 +161,9 @@ entry:
define i32 @TEST_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_mask_test_epi8_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
@@ -197,9 +197,9 @@ entry:
define zeroext i16 @TEST_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_mask_test_epi16_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -233,9 +233,9 @@ entry:
define i32 @TEST_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_mask_testn_epi8_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
@@ -269,9 +269,9 @@ entry:
define zeroext i16 @TEST_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm256_mask_testn_epi16_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1}
+; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll Sat Feb 3 17:43:48 2018
@@ -4,9 +4,9 @@
define i8 @mask8(i8 %x) {
; CHECK-LABEL: mask8:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k0
-; CHECK-NEXT: knotb %k0, %k0
+; CHECK-NEXT: kxnorw %k0, %k0, %k0
; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: xorb %dil, %al
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%m0 = bitcast i8 %x to <8 x i1>
Modified: llvm/trunk/test/CodeGen/X86/avx512f-vec-test-testn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512f-vec-test-testn.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512f-vec-test-testn.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512f-vec-test-testn.ll Sat Feb 3 17:43:48 2018
@@ -38,9 +38,9 @@ entry:
define zeroext i8 @TEST_mm512_mask_test_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_test_epi64_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -57,9 +57,9 @@ entry:
define zeroext i16 @TEST_mm512_mask_test_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_test_epi32_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: vptestmd %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -110,9 +110,9 @@ entry:
define zeroext i8 @TEST_mm512_mask_testn_epi64_mask(i8 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_testn_epi64_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: vptestnmq %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andb %dil, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
@@ -129,9 +129,9 @@ entry:
define zeroext i16 @TEST_mm512_mask_testn_epi32_mask(i16 %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; CHECK-LABEL: TEST_mm512_mask_testn_epi32_mask:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: vptestnmd %zmm0, %zmm1, %k0
; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Sat Feb 3 17:43:48 2018
@@ -1073,9 +1073,9 @@ define i8 @test_pcmpeq_d_256(<8 x i32> %
define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpeq_d_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8]
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
@@ -1123,9 +1123,9 @@ define i8 @test_pcmpgt_d_256(<8 x i32> %
define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
; CHECK-LABEL: test_mask_pcmpgt_d_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1]
+; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8]
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
@@ -6013,11 +6013,9 @@ define i8 at test_int_x86_avx512_ptestm_d_2
; CHECK-LABEL: test_int_x86_avx512_ptestm_d_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x27,0xc9]
-; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7]
+; CHECK-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8]
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
@@ -6089,11 +6087,9 @@ define i8 at test_int_x86_avx512_ptestnm_d_
; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_256:
; CHECK: ## %bb.0:
; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x27,0xc9]
-; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7]
+; CHECK-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8]
; CHECK-NEXT: ## kill: def $al killed $al killed $eax
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Sat Feb 3 17:43:48 2018
@@ -63,9 +63,9 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -91,9 +91,9 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -172,10 +172,9 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -201,10 +200,9 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -294,18 +292,19 @@ define zeroext i64 @test_masked_vpcmpeqb
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -331,18 +330,19 @@ define zeroext i64 @test_masked_vpcmpeqb
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -756,9 +756,9 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -785,9 +785,9 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -869,10 +869,9 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -899,10 +898,9 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1247,189 +1245,190 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm4
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm5
+; NoVLX-NEXT: vmovq %xmm9, %rax
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm5
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
+; NoVLX-NEXT: movl %eax, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %edx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rdx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
+; NoVLX-NEXT: movl %edx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rdx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm5, %ymm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
-; NoVLX-NEXT: orl %ecx, %eax
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %edx
+; NoVLX-NEXT: andl %edi, %edx
+; NoVLX-NEXT: shll $16, %edx
+; NoVLX-NEXT: movzwl %cx, %eax
+; NoVLX-NEXT: orl %edx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1454,103 +1453,104 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %edx, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rdx, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %eax, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -2935,9 +2935,9 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2962,9 +2962,9 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3016,9 +3016,9 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3095,10 +3095,9 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3123,10 +3122,9 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3179,10 +3177,9 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5459,9 +5456,9 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5487,9 +5484,9 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5568,10 +5565,9 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5597,10 +5593,9 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5690,18 +5685,19 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -5727,18 +5723,19 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -6152,9 +6149,9 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6181,9 +6178,9 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6265,10 +6262,9 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6295,10 +6291,9 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6643,189 +6638,190 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm4
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm5
+; NoVLX-NEXT: vmovq %xmm9, %rax
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm5
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
+; NoVLX-NEXT: movl %eax, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %edx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rdx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
+; NoVLX-NEXT: movl %edx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rdx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm5, %ymm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
-; NoVLX-NEXT: orl %ecx, %eax
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %edx
+; NoVLX-NEXT: andl %edi, %edx
+; NoVLX-NEXT: shll $16, %edx
+; NoVLX-NEXT: movzwl %cx, %eax
+; NoVLX-NEXT: orl %edx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6850,103 +6846,104 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %edx, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rdx, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %eax, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -8331,9 +8328,9 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8358,9 +8355,9 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8412,9 +8409,9 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8491,10 +8488,9 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8519,10 +8515,9 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8575,10 +8570,9 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -10862,9 +10856,9 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -10893,9 +10887,9 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -10981,10 +10975,9 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11013,10 +11006,9 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11113,21 +11105,22 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -11153,22 +11146,23 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -11619,9 +11613,9 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11651,9 +11645,9 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11742,10 +11736,9 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11775,10 +11768,9 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12133,193 +12125,194 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm4
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovq %xmm9, %rax
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm4
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
+; NoVLX-NEXT: movl %eax, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %edx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rdx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
+; NoVLX-NEXT: movl %edx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rdx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm4, %ymm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
-; NoVLX-NEXT: orl %ecx, %eax
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %edx
+; NoVLX-NEXT: andl %edi, %edx
+; NoVLX-NEXT: shll $16, %edx
+; NoVLX-NEXT: movzwl %cx, %eax
+; NoVLX-NEXT: orl %edx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12344,109 +12337,110 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %edx, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rdx, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %eax, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %eax, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm1
-; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1
; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0
+; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -13831,9 +13825,9 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13858,9 +13852,9 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13912,9 +13906,9 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13991,10 +13985,9 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14019,10 +14012,9 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14075,10 +14067,9 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16364,9 +16355,9 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16395,9 +16386,9 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16485,10 +16476,9 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16517,10 +16507,9 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16616,21 +16605,22 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -16656,21 +16646,22 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -17129,9 +17120,9 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17161,9 +17152,9 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17254,10 +17245,9 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17287,10 +17277,9 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17645,194 +17634,195 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: vmovd %eax, %xmm4
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm5
+; NoVLX-NEXT: vmovq %xmm9, %rax
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm2, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm7, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm5
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %ecx, %xmm0
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7
+; NoVLX-NEXT: movl %eax, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movl %edx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rdx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %eax, %edx
+; NoVLX-NEXT: shrl $16, %edx
+; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1
+; NoVLX-NEXT: movl %edx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rdx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
-; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm5, %ymm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm1
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k2}
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
-; NoVLX-NEXT: orl %ecx, %eax
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %edx
+; NoVLX-NEXT: andl %edi, %edx
+; NoVLX-NEXT: shll $16, %edx
+; NoVLX-NEXT: movzwl %cx, %eax
+; NoVLX-NEXT: orl %edx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17858,107 +17848,108 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: movl %edx, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rdx, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm0, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: movl %ecx, %edx
+; NoVLX-NEXT: shrl $16, %edx
; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rdx
+; NoVLX-NEXT: shrq $32, %rdx
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movl %edx, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rdx, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %eax, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm4
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm4, %ymm0
+; NoVLX-NEXT: shrq $48, %rdx
+; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: kmovw %edi, %k2
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: andl %edi, %eax
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm0
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor 32(%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: andl %edi, %ecx
+; NoVLX-NEXT: shll $16, %ecx
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
@@ -19343,9 +19334,9 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19370,9 +19361,9 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19424,9 +19415,9 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19503,10 +19494,9 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19531,10 +19521,9 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19587,10 +19576,9 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23188,9 +23176,9 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23215,9 +23203,9 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23243,9 +23231,9 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23390,10 +23378,9 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23418,10 +23405,9 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23447,10 +23433,9 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: andl %edi, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-test-testn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-test-testn.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-test-testn.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-test-testn.ll Sat Feb 3 17:43:48 2018
@@ -194,19 +194,18 @@ entry:
define zeroext i8 @TEST_mm256_mask_test_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_mask_test_epi32_mask:
; X86_64: # %bb.0: # %entry
-; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1}
+; X86_64-NEXT: vptestmd %ymm0, %ymm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
+; X86_64-NEXT: andb %dil, %al
; X86_64-NEXT: # kill: def $al killed $al killed $eax
; X86_64-NEXT: vzeroupper
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm256_mask_test_epi32_mask:
; I386: # %bb.0: # %entry
-; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1}
+; I386-NEXT: vptestmd %ymm0, %ymm1, %k0
; I386-NEXT: kmovw %k0, %eax
+; I386-NEXT: andb {{[0-9]+}}(%esp), %al
; I386-NEXT: # kill: def $al killed $al killed $eax
; I386-NEXT: vzeroupper
; I386-NEXT: retl
@@ -412,19 +411,18 @@ entry:
define zeroext i8 @TEST_mm256_mask_testn_epi32_mask(i8 %__U, <4 x i64> %__A, <4 x i64> %__B) local_unnamed_addr #0 {
; X86_64-LABEL: TEST_mm256_mask_testn_epi32_mask:
; X86_64: # %bb.0: # %entry
-; X86_64-NEXT: kmovw %edi, %k1
-; X86_64-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1}
+; X86_64-NEXT: vptestnmd %ymm0, %ymm1, %k0
; X86_64-NEXT: kmovw %k0, %eax
+; X86_64-NEXT: andb %dil, %al
; X86_64-NEXT: # kill: def $al killed $al killed $eax
; X86_64-NEXT: vzeroupper
; X86_64-NEXT: retq
;
; I386-LABEL: TEST_mm256_mask_testn_epi32_mask:
; I386: # %bb.0: # %entry
-; I386-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; I386-NEXT: kmovw %eax, %k1
-; I386-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1}
+; I386-NEXT: vptestnmd %ymm0, %ymm1, %k0
; I386-NEXT: kmovw %k0, %eax
+; I386-NEXT: andb {{[0-9]+}}(%esp), %al
; I386-NEXT: # kill: def $al killed $al killed $eax
; I386-NEXT: vzeroupper
; I386-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/combine-testm-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-testm-and.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-testm-and.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-testm-and.ll Sat Feb 3 17:43:48 2018
@@ -17,9 +17,10 @@ define i32 @combineTESTM_AND_1(<8 x i64>
define i32 @combineTESTM_AND_2(<8 x i64> %a, <8 x i64> %b , i8 %mask) {
; CHECK-LABEL: combineTESTM_AND_2:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andb %dil, %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%and.i = and <8 x i64> %b, %a
@@ -31,9 +32,10 @@ define i32 @combineTESTM_AND_2(<8 x i64>
define i32 @combineTESTM_AND_mask_3(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) {
; CHECK-LABEL: combineTESTM_AND_mask_3:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andb %sil, %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %bptr
@@ -46,9 +48,10 @@ define i32 @combineTESTM_AND_mask_3(<8 x
define i32 @combineTESTM_AND_mask_4(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) {
; CHECK-LABEL: combineTESTM_AND_mask_4:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0
+; CHECK-NEXT: kmovd %k0, %eax
+; CHECK-NEXT: andb %sil, %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%b = load <8 x i64>, <8 x i64>* %bptr
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=324184&r1=324183&r2=324184&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Sat Feb 3 17:43:48 2018
@@ -540,7 +540,7 @@ define i32 @stack_fold_pcmpeqw(<32 x i16
ret i32 %3
}
-define i16 @stack_fold_pcmpeqd_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) {
+define <16 x i32> @stack_fold_pcmpeqd_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask, <16 x i32> %b0, <16 x i32> %b1) {
;CHECK-LABEL: stack_fold_pcmpeqd_mask
;CHECK: vpcmpeqd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
@@ -550,11 +550,11 @@ define i16 @stack_fold_pcmpeqd_mask(<16
%4 = bitcast i16 %mask to <16 x i1>
%5 = icmp eq <16 x i32> %3, %a0
%6 = and <16 x i1> %4, %5
- %7 = bitcast <16 x i1> %6 to i16
- ret i16 %7
+ %7 = select <16 x i1> %6, <16 x i32> %b0, <16 x i32> %b1
+ ret <16 x i32> %7
}
-define i16 @stack_fold_pcmpeqd_mask_commuted(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) {
+define <16 x i32> @stack_fold_pcmpeqd_mask_commuted(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask, <16 x i32> %b0, <16 x i32> %b1) {
;CHECK-LABEL: stack_fold_pcmpeqd_mask_commuted
;CHECK: vpcmpeqd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
@@ -564,11 +564,11 @@ define i16 @stack_fold_pcmpeqd_mask_comm
%4 = bitcast i16 %mask to <16 x i1>
%5 = icmp eq <16 x i32> %a0, %3
%6 = and <16 x i1> %4, %5
- %7 = bitcast <16 x i1> %6 to i16
- ret i16 %7
+ %7 = select <16 x i1> %6, <16 x i32> %b0, <16 x i32> %b1
+ ret <16 x i32> %7
}
-define i16 @stack_fold_pcmpled_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) {
+define <16 x i32> @stack_fold_pcmpled_mask(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask, <16 x i32> %b0, <16 x i32> %b1) {
;CHECK-LABEL: stack_fold_pcmpled_mask
;CHECK: vpcmpled {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%k[0-7]}} {{{%k[0-7]}}} {{.*#+}} 64-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
@@ -578,8 +578,8 @@ define i16 @stack_fold_pcmpled_mask(<16
%4 = bitcast i16 %mask to <16 x i1>
%5 = icmp sge <16 x i32> %a0, %3
%6 = and <16 x i1> %4, %5
- %7 = bitcast <16 x i1> %6 to i16
- ret i16 %7
+ %7 = select <16 x i1> %6, <16 x i32> %b0, <16 x i32> %b1
+ ret <16 x i32> %7
}
define i16 @stack_fold_pcmpleud(<16 x i32> %a0, <16 x i32> %a1, <16 x i32>* %a2, i16 %mask) {
More information about the llvm-commits
mailing list