[llvm] b0b5c54 - [X86] Refactor MatchVectorAllZeroTest -> MatchVectorAllEqualTest
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 1 03:08:31 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-01T11:08:13+01:00
New Revision: b0b5c546e5c9bc4ad1fbd781146046f3845ebb07
URL: https://github.com/llvm/llvm-project/commit/b0b5c546e5c9bc4ad1fbd781146046f3845ebb07
DIFF: https://github.com/llvm/llvm-project/commit/b0b5c546e5c9bc4ad1fbd781146046f3845ebb07.diff
LOG: [X86] Refactor MatchVectorAllZeroTest -> MatchVectorAllEqualTest
Refactor MatchVectorAllZeroTest to work with allof/anyof/noneof comparisons with -1 as well as 0, and use it handle icmp(bitcast(icmp_eq(X,Y)),-1) reduction patterns.
We're still missing test coverage for other other reduction types, so have kept these limited to icmp X,0 for now.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512-mask-op.ll
llvm/test/CodeGen/X86/movmsk-cmp.ll
llvm/test/CodeGen/X86/vector-compare-all_of.ll
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5ad96013be071..eacce087630d8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24427,39 +24427,50 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
return LowerVectorAllEqual(DL, V, Z, CC, Mask, Subtarget, DAG, X86CC);
}
-// Check whether an OR'd reduction tree is PTEST-able, or if we can fallback to
-// CMP(MOVMSK(PCMPEQB(X,0))).
-static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
- const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG, X86::CondCode &X86CC) {
+// Check whether an AND/OR'd reduction tree is PTEST-able, or if we can fallback
+// to CMP(MOVMSK(PCMPEQB(X,Y))).
+static SDValue MatchVectorAllEqualTest(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, const SDLoc &DL,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG,
+ X86::CondCode &X86CC) {
assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode");
+ bool CmpNull = isNullConstant(RHS);
+ bool CmpAllOnes = isAllOnesConstant(RHS);
+ if (!CmpNull && !CmpAllOnes)
+ return SDValue();
+
+ SDValue Op = LHS;
if (!Subtarget.hasSSE2() || !Op->hasOneUse())
return SDValue();
// Check whether we're masking/truncating an OR-reduction result, in which
// case track the masked bits.
+ // TODO: Add CmpAllOnes support.
APInt Mask = APInt::getAllOnes(Op.getScalarValueSizeInBits());
- switch (Op.getOpcode()) {
- case ISD::TRUNCATE: {
- SDValue Src = Op.getOperand(0);
- Mask = APInt::getLowBitsSet(Src.getScalarValueSizeInBits(),
- Op.getScalarValueSizeInBits());
- Op = Src;
- break;
- }
- case ISD::AND: {
- if (auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- Mask = Cst->getAPIntValue();
- Op = Op.getOperand(0);
+ if (CmpNull) {
+ switch (Op.getOpcode()) {
+ case ISD::TRUNCATE: {
+ SDValue Src = Op.getOperand(0);
+ Mask = APInt::getLowBitsSet(Src.getScalarValueSizeInBits(),
+ Op.getScalarValueSizeInBits());
+ Op = Src;
+ break;
+ }
+ case ISD::AND: {
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Cst->getAPIntValue();
+ Op = Op.getOperand(0);
+ }
+ break;
+ }
}
- break;
- }
}
+ // TODO: Add CmpAllOnes support.
SmallVector<SDValue, 8> VecIns;
- if (Op.getOpcode() == ISD::OR && matchScalarReduction(Op, ISD::OR, VecIns)) {
+ if (CmpNull && Op.getOpcode() == ISD::OR && matchScalarReduction(Op, ISD::OR, VecIns)) {
EVT VT = VecIns[0].getValueType();
assert(llvm::all_of(VecIns,
[VT](SDValue V) { return VT == V.getValueType(); }) &&
@@ -24485,7 +24496,8 @@ static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
return V;
}
- if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ // TODO: Add CmpAllOnes support.
+ if (CmpNull && Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ISD::NodeType BinOp;
if (SDValue Match =
DAG.matchBinOpReduction(Op.getNode(), BinOp, {ISD::OR})) {
@@ -24496,7 +24508,7 @@ static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
}
// Match icmp(bitcast(icmp_ne(X,Y)),0) reduction patterns.
- // TODO: Expand to icmp(bitcast(icmp_eq(X,Y)),-1) patterns.
+ // Match icmp(bitcast(icmp_eq(X,Y)),-1) reduction patterns.
if (Mask.isAllOnes()) {
assert(!Op.getValueType().isVector() &&
"Illegal vector type for reduction pattern");
@@ -24505,7 +24517,7 @@ static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
Src.getValueType().isFixedLengthVector() &&
Src.getValueType().getScalarType() == MVT::i1) {
ISD::CondCode SrcCC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
- if (SrcCC == ISD::SETNE) {
+ if (SrcCC == (CmpNull ? ISD::SETNE : ISD::SETEQ)) {
SDValue LHS = Src.getOperand(0);
SDValue RHS = Src.getOperand(1);
EVT LHSVT = LHS.getValueType();
@@ -25710,14 +25722,12 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
}
}
- // Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0.
- // TODO: We could do AND tree with all 1s as well by using the C flag.
- if (isNullConstant(Op1))
- if (SDValue CmpZ = MatchVectorAllZeroTest(Op0, CC, dl, Subtarget, DAG,
- X86CondCode)) {
- X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
- return CmpZ;
- }
+ // Try to use PTEST/PMOVMSKB for a tree AND/ORs equality compared with -1/0.
+ if (SDValue CmpZ = MatchVectorAllEqualTest(Op0, Op1, CC, dl, Subtarget, DAG,
+ X86CondCode)) {
+ X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
+ return CmpZ;
+ }
// Try to lower using KORTEST or KTEST.
if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
@@ -54221,10 +54231,10 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
Subtarget))
return V;
- if (VT == MVT::i1 && isNullConstant(RHS)) {
+ if (VT == MVT::i1) {
X86::CondCode X86CC;
if (SDValue V =
- MatchVectorAllZeroTest(LHS, CC, DL, Subtarget, DAG, X86CC))
+ MatchVectorAllEqualTest(LHS, RHS, CC, DL, Subtarget, DAG, X86CC))
return DAG.getNode(ISD::TRUNCATE, DL, VT, getSETCC(X86CC, V, DL, DAG));
}
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 7ac36cd49439b..1c397899f4f91 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -4098,14 +4098,14 @@ bb.2:
}
declare void @foo()
-; Make sure we can use the C flag from kortest to check for all ones.
+; Make sure we can use the ZF/CF flags from kortest to check for all ones.
define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
; CHECK-LABEL: ktest_allones:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0
; CHECK-NEXT: kortestw %k0, %k0
-; CHECK-NEXT: jb LBB67_2
+; CHECK-NEXT: je LBB67_2
; CHECK-NEXT: ## %bb.1: ## %bb.1
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .cfi_def_cfa_offset 16
@@ -4119,9 +4119,9 @@ define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
; X86-LABEL: ktest_allones:
; X86: ## %bb.0:
; X86-NEXT: vpord %zmm1, %zmm0, %zmm0
-; X86-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; X86-NEXT: vptestmd %zmm0, %zmm0, %k0
; X86-NEXT: kortestw %k0, %k0
-; X86-NEXT: jb LBB67_2
+; X86-NEXT: je LBB67_2
; X86-NEXT: ## %bb.1: ## %bb.1
; X86-NEXT: subl $12, %esp
; X86-NEXT: .cfi_def_cfa_offset 16
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index ecd2df46bc6aa..2782bac923f40 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -947,7 +947,7 @@ define i1 @allzeros_v16i8_not(<16 x i8> %a0) {
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
; SSE2-NEXT: pmovmskb %xmm1, %eax
-; SSE2-NEXT: cmpw $-1, %ax
+; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
@@ -957,24 +957,11 @@ define i1 @allzeros_v16i8_not(<16 x i8> %a0) {
; SSE41-NEXT: setne %al
; SSE41-NEXT: retq
;
-; AVX1OR2-LABEL: allzeros_v16i8_not:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vptest %xmm0, %xmm0
-; AVX1OR2-NEXT: setne %al
-; AVX1OR2-NEXT: retq
-;
-; KNL-LABEL: allzeros_v16i8_not:
-; KNL: # %bb.0:
-; KNL-NEXT: vptest %xmm0, %xmm0
-; KNL-NEXT: setne %al
-; KNL-NEXT: retq
-;
-; SKX-LABEL: allzeros_v16i8_not:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k0
-; SKX-NEXT: kortestw %k0, %k0
-; SKX-NEXT: setae %al
-; SKX-NEXT: retq
+; AVX-LABEL: allzeros_v16i8_not:
+; AVX: # %bb.0:
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: setne %al
+; AVX-NEXT: retq
%1 = icmp eq <16 x i8> %a0, zeroinitializer
%2 = bitcast <16 x i1> %1 to i16
%3 = icmp ne i16 %2, -1
@@ -986,10 +973,8 @@ define i1 @allzeros_v2i64_not(<2 x i64> %a0) {
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movmskpd %xmm0, %eax
-; SSE2-NEXT: cmpb $3, %al
+; SSE2-NEXT: movmskps %xmm1, %eax
+; SSE2-NEXT: xorl $15, %eax
; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
@@ -999,29 +984,11 @@ define i1 @allzeros_v2i64_not(<2 x i64> %a0) {
; SSE41-NEXT: setne %al
; SSE41-NEXT: retq
;
-; AVX1OR2-LABEL: allzeros_v2i64_not:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vptest %xmm0, %xmm0
-; AVX1OR2-NEXT: setne %al
-; AVX1OR2-NEXT: retq
-;
-; KNL-LABEL: allzeros_v2i64_not:
-; KNL: # %bb.0:
-; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: testb $3, %al
-; KNL-NEXT: setne %al
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: retq
-;
-; SKX-LABEL: allzeros_v2i64_not:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm0, %xmm0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: cmpb $3, %al
-; SKX-NEXT: setne %al
-; SKX-NEXT: retq
+; AVX-LABEL: allzeros_v2i64_not:
+; AVX: # %bb.0:
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: setne %al
+; AVX-NEXT: retq
%1 = icmp eq <2 x i64> %a0, zeroinitializer
%2 = bitcast <2 x i1> %1 to i2
%3 = icmp ne i2 %2, -1
@@ -1029,51 +996,29 @@ define i1 @allzeros_v2i64_not(<2 x i64> %a0) {
}
define i1 @allzeros_v8i32_not(<8 x i32> %a0) {
-; SSE-LABEL: allzeros_v8i32_not:
-; SSE: # %bb.0:
-; SSE-NEXT: pxor %xmm2, %xmm2
-; SSE-NEXT: pcmpeqd %xmm2, %xmm1
-; SSE-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE-NEXT: packssdw %xmm1, %xmm0
-; SSE-NEXT: packsswb %xmm0, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpb $-1, %al
-; SSE-NEXT: setne %al
-; SSE-NEXT: retq
-;
-; AVX1-LABEL: allzeros_v8i32_not:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vptest %xmm0, %xmm0
-; AVX1-NEXT: setne %al
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: allzeros_v8i32_not:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vptest %ymm0, %ymm0
-; AVX2-NEXT: setne %al
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; SSE2-LABEL: allzeros_v8i32_not:
+; SSE2: # %bb.0:
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: movmskps %xmm1, %eax
+; SSE2-NEXT: xorl $15, %eax
+; SSE2-NEXT: setne %al
+; SSE2-NEXT: retq
;
-; KNL-LABEL: allzeros_v8i32_not:
-; KNL: # %bb.0:
-; KNL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; KNL-NEXT: vptestnmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: cmpb $-1, %al
-; KNL-NEXT: setne %al
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: retq
+; SSE41-LABEL: allzeros_v8i32_not:
+; SSE41: # %bb.0:
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: ptest %xmm0, %xmm0
+; SSE41-NEXT: setne %al
+; SSE41-NEXT: retq
;
-; SKX-LABEL: allzeros_v8i32_not:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k0
-; SKX-NEXT: kortestb %k0, %k0
-; SKX-NEXT: setae %al
-; SKX-NEXT: vzeroupper
-; SKX-NEXT: retq
+; AVX-LABEL: allzeros_v8i32_not:
+; AVX: # %bb.0:
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: setne %al
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%1 = icmp eq <8 x i32> %a0, zeroinitializer
%2 = bitcast <8 x i1> %1 to i8
%3 = icmp ne i8 %2, -1
@@ -1083,90 +1028,48 @@ define i1 @allzeros_v8i32_not(<8 x i32> %a0) {
define i1 @allzeros_v8i64_not(<8 x i64> %a0) {
; SSE2-LABEL: allzeros_v8i64_not:
; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm4, %xmm4
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
-; SSE2-NEXT: pand %xmm3, %xmm5
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
-; SSE2-NEXT: pand %xmm2, %xmm3
-; SSE2-NEXT: packssdw %xmm5, %xmm3
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: packssdw %xmm2, %xmm1
-; SSE2-NEXT: packssdw %xmm3, %xmm1
-; SSE2-NEXT: packsswb %xmm1, %xmm1
-; SSE2-NEXT: pmovmskb %xmm1, %eax
-; SSE2-NEXT: cmpb $-1, %al
+; SSE2-NEXT: por %xmm3, %xmm1
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT: movmskps %xmm1, %eax
+; SSE2-NEXT: xorl $15, %eax
; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
; SSE41-LABEL: allzeros_v8i64_not:
; SSE41: # %bb.0:
-; SSE41-NEXT: pxor %xmm4, %xmm4
-; SSE41-NEXT: pcmpeqq %xmm4, %xmm3
-; SSE41-NEXT: pcmpeqq %xmm4, %xmm2
-; SSE41-NEXT: packssdw %xmm3, %xmm2
-; SSE41-NEXT: pcmpeqq %xmm4, %xmm1
-; SSE41-NEXT: pcmpeqq %xmm4, %xmm0
-; SSE41-NEXT: packssdw %xmm1, %xmm0
-; SSE41-NEXT: packssdw %xmm2, %xmm0
-; SSE41-NEXT: packsswb %xmm0, %xmm0
-; SSE41-NEXT: pmovmskb %xmm0, %eax
-; SSE41-NEXT: cmpb $-1, %al
+; SSE41-NEXT: por %xmm3, %xmm1
+; SSE41-NEXT: por %xmm2, %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: ptest %xmm0, %xmm0
; SSE41-NEXT: setne %al
; SSE41-NEXT: retq
;
; AVX1-LABEL: allzeros_v8i64_not:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovmskps %xmm0, %eax
-; AVX1-NEXT: cmpl $15, %eax
+; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vptest %ymm0, %ymm0
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: allzeros_v8i64_not:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: cmpb $-1, %al
+; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vptest %ymm0, %ymm0
; AVX2-NEXT: setne %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; KNL-LABEL: allzeros_v8i64_not:
-; KNL: # %bb.0:
-; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: cmpb $-1, %al
-; KNL-NEXT: setne %al
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: retq
-;
-; SKX-LABEL: allzeros_v8i64_not:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k0
-; SKX-NEXT: kortestb %k0, %k0
-; SKX-NEXT: setae %al
-; SKX-NEXT: vzeroupper
-; SKX-NEXT: retq
+; AVX512-LABEL: allzeros_v8i64_not:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512-NEXT: kortestw %k0, %k0
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = icmp eq <8 x i64> %a0, zeroinitializer
%2 = bitcast <8 x i1> %1 to i8
%3 = icmp ne i8 %2, -1
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index 6d8d68d267537..55bf6da607380 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1427,9 +1427,9 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
;
; AVX512-LABEL: bool_reduction_v16i16:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
-; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
+; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vptest %ymm0, %ymm0
+; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%a = icmp eq <16 x i16> %x, %y
@@ -1487,9 +1487,9 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
;
; AVX512-LABEL: bool_reduction_v32i8:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
-; AVX512-NEXT: kortestd %k0, %k0
-; AVX512-NEXT: setb %al
+; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vptest %ymm0, %ymm0
+; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%a = icmp eq <32 x i8> %x, %y
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 572a10e6d1775..b930d97488ece 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -963,19 +963,13 @@ define i1 @icmp0_v8i16_v8i1(<8 x i16>) {
;
; AVX512BW-LABEL: icmp0_v8i16_v8i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: cmpb $-1, %al
+; AVX512BW-NEXT: vptest %xmm0, %xmm0
; AVX512BW-NEXT: sete %al
-; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp0_v8i16_v8i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestnmw %xmm0, %xmm0, %k0
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: cmpb $-1, %al
+; AVX512VL-NEXT: vptest %xmm0, %xmm0
; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: retq
%a = icmp eq <8 x i16> %0, zeroinitializer
@@ -999,33 +993,11 @@ define i1 @icmp0_v16i8_v16i1(<16 x i8>) {
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
-; AVX1OR2-LABEL: icmp0_v16i8_v16i1:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vptest %xmm0, %xmm0
-; AVX1OR2-NEXT: sete %al
-; AVX1OR2-NEXT: retq
-;
-; AVX512F-LABEL: icmp0_v16i8_v16i1:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vptest %xmm0, %xmm0
-; AVX512F-NEXT: sete %al
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: icmp0_v16i8_v16i1:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kortestw %k0, %k0
-; AVX512BW-NEXT: setb %al
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: icmp0_v16i8_v16i1:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestnmb %xmm0, %xmm0, %k0
-; AVX512VL-NEXT: kortestw %k0, %k0
-; AVX512VL-NEXT: setb %al
-; AVX512VL-NEXT: retq
+; AVX-LABEL: icmp0_v16i8_v16i1:
+; AVX: # %bb.0:
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: sete %al
+; AVX-NEXT: retq
%a = icmp eq <16 x i8> %0, zeroinitializer
%b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
ret i1 %b
@@ -1107,41 +1079,12 @@ define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
-; AVX1OR2-LABEL: icmp0_v8i32_v8i1:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vptest %ymm0, %ymm0
-; AVX1OR2-NEXT: sete %al
-; AVX1OR2-NEXT: vzeroupper
-; AVX1OR2-NEXT: retq
-;
-; AVX512F-LABEL: icmp0_v8i32_v8i1:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: cmpb $-1, %al
-; AVX512F-NEXT: sete %al
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: icmp0_v8i32_v8i1:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vptestnmd %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: cmpb $-1, %al
-; AVX512BW-NEXT: sete %al
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: icmp0_v8i32_v8i1:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: cmpb $-1, %al
-; AVX512VL-NEXT: sete %al
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX-LABEL: icmp0_v8i32_v8i1:
+; AVX: # %bb.0:
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: sete %al
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
%a = icmp eq <8 x i32> %0, zeroinitializer
%b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
ret i1 %b
@@ -1185,18 +1128,15 @@ define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
;
; AVX512BW-LABEL: icmp0_v16i16_v16i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kortestw %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vptest %ymm0, %ymm0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp0_v16i16_v16i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestnmw %ymm0, %ymm0, %k0
-; AVX512VL-NEXT: kortestw %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vptest %ymm0, %ymm0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <16 x i16> %0, zeroinitializer
@@ -1252,18 +1192,15 @@ define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
;
; AVX512BW-LABEL: icmp0_v32i8_v32i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kortestd %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vptest %ymm0, %ymm0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp0_v32i8_v32i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestnmb %ymm0, %ymm0, %k0
-; AVX512VL-NEXT: kortestd %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vptest %ymm0, %ymm0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <32 x i8> %0, zeroinitializer
@@ -1309,32 +1246,13 @@ define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: icmp0_v8i64_v8i1:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: cmpb $-1, %al
-; AVX512F-NEXT: sete %al
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: icmp0_v8i64_v8i1:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vptestnmq %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: cmpb $-1, %al
-; AVX512BW-NEXT: sete %al
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: icmp0_v8i64_v8i1:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: cmpb $-1, %al
-; AVX512VL-NEXT: sete %al
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: icmp0_v8i64_v8i1:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512-NEXT: kortestw %k0, %k0
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%a = icmp eq <8 x i64> %0, zeroinitializer
%b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
ret i1 %b
@@ -1380,9 +1298,9 @@ define i1 @icmp0_v16i32_v16i1(<16 x i32>) {
;
; AVX512-LABEL: icmp0_v16i32_v16i1:
; AVX512: # %bb.0:
-; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0
+; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
+; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%a = icmp eq <16 x i32> %0, zeroinitializer
@@ -1452,17 +1370,17 @@ define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
;
; AVX512BW-LABEL: icmp0_v32i16_v32i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vptestnmw %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kortestd %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: kortestw %k0, %k0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp0_v32i16_v32i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestnmw %zmm0, %zmm0, %k0
-; AVX512VL-NEXT: kortestd %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512VL-NEXT: kortestw %k0, %k0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <32 x i16> %0, zeroinitializer
@@ -1537,17 +1455,17 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
;
; AVX512BW-LABEL: icmp0_v64i8_v64i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vptestnmb %zmm0, %zmm0, %k0
-; AVX512BW-NEXT: kortestq %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512BW-NEXT: kortestw %k0, %k0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp0_v64i8_v64i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vptestnmb %zmm0, %zmm0, %k0
-; AVX512VL-NEXT: kortestq %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512VL-NEXT: kortestw %k0, %k0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <64 x i8> %0, zeroinitializer
@@ -1786,21 +1704,19 @@ define i1 @icmp1_v8i16_v8i1(<8 x i16>) {
; AVX512BW-LABEL: icmp1_v8i16_v8i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: cmpb $-1, %al
-; AVX512BW-NEXT: sete %al
+; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm1
+; AVX512BW-NEXT: vptest %xmm1, %xmm0
+; AVX512BW-NEXT: setb %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp1_v8i16_v8i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: cmpb $-1, %al
-; AVX512VL-NEXT: sete %al
+; AVX512VL-NEXT: vmovdqa %xmm0, %xmm1
+; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm1
+; AVX512VL-NEXT: vptest %xmm1, %xmm0
+; AVX512VL-NEXT: setb %al
; AVX512VL-NEXT: retq
%a = icmp eq <8 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
%b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
@@ -1844,18 +1760,18 @@ define i1 @icmp1_v16i8_v16i1(<16 x i8>) {
; AVX512BW-LABEL: icmp1_v16i8_v16i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestw %k0, %k0
+; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm1
+; AVX512BW-NEXT: vptest %xmm1, %xmm0
; AVX512BW-NEXT: setb %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp1_v16i8_v16i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX512VL-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
-; AVX512VL-NEXT: kortestw %k0, %k0
+; AVX512VL-NEXT: vmovdqa %xmm0, %xmm1
+; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm1
+; AVX512VL-NEXT: vptest %xmm1, %xmm0
; AVX512VL-NEXT: setb %al
; AVX512VL-NEXT: retq
%a = icmp eq <16 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -1984,32 +1900,29 @@ define i1 @icmp1_v8i32_v8i1(<8 x i32>) {
; AVX512F-LABEL: icmp1_v8i32_v8i1:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: cmpb $-1, %al
-; AVX512F-NEXT: sete %al
+; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512F-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm1
+; AVX512F-NEXT: vptest %ymm1, %ymm0
+; AVX512F-NEXT: setb %al
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: icmp1_v8i32_v8i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: cmpb $-1, %al
-; AVX512BW-NEXT: sete %al
+; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm1
+; AVX512BW-NEXT: vptest %ymm1, %ymm0
+; AVX512BW-NEXT: setb %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp1_v8i32_v8i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: cmpb $-1, %al
-; AVX512VL-NEXT: sete %al
+; AVX512VL-NEXT: vmovdqa %ymm0, %ymm1
+; AVX512VL-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm1
+; AVX512VL-NEXT: vptest %ymm1, %ymm0
+; AVX512VL-NEXT: setb %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2072,18 +1985,18 @@ define i1 @icmp1_v16i16_v16i1(<16 x i16>) {
; AVX512BW-LABEL: icmp1_v16i16_v16i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestw %k0, %k0
+; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm1
+; AVX512BW-NEXT: vptest %ymm1, %ymm0
; AVX512BW-NEXT: setb %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp1_v16i16_v16i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
-; AVX512VL-NEXT: kortestw %k0, %k0
+; AVX512VL-NEXT: vmovdqa %ymm0, %ymm1
+; AVX512VL-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm1
+; AVX512VL-NEXT: vptest %ymm1, %ymm0
; AVX512VL-NEXT: setb %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@@ -2157,18 +2070,18 @@ define i1 @icmp1_v32i8_v32i1(<32 x i8>) {
; AVX512BW-LABEL: icmp1_v32i8_v32i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestd %k0, %k0
+; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1
+; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm1
+; AVX512BW-NEXT: vptest %ymm1, %ymm0
; AVX512BW-NEXT: setb %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp1_v32i8_v32i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
-; AVX512VL-NEXT: kortestd %k0, %k0
+; AVX512VL-NEXT: vmovdqa %ymm0, %ymm1
+; AVX512VL-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm1
+; AVX512VL-NEXT: vptest %ymm1, %ymm0
; AVX512VL-NEXT: setb %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@@ -2224,35 +2137,14 @@ define i1 @icmp1_v8i64_v8i1(<8 x i64>) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: icmp1_v8i64_v8i1:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: cmpb $-1, %al
-; AVX512F-NEXT: sete %al
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: icmp1_v8i64_v8i1:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: cmpb $-1, %al
-; AVX512BW-NEXT: sete %al
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: icmp1_v8i64_v8i1:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512VL-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: cmpb $-1, %al
-; AVX512VL-NEXT: sete %al
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: icmp1_v8i64_v8i1:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512-NEXT: kortestw %k0, %k0
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%a = icmp eq <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
%b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
ret i1 %b
@@ -2308,9 +2200,9 @@ define i1 @icmp1_v16i32_v16i1(<16 x i32>) {
; AVX512-LABEL: icmp1_v16i32_v16i1:
; AVX512: # %bb.0:
; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
+; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%a = icmp eq <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -2390,18 +2282,18 @@ define i1 @icmp1_v32i16_v32i1(<32 x i16>) {
; AVX512BW-LABEL: icmp1_v32i16_v32i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestd %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kortestw %k0, %k0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp1_v32i16_v32i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512VL-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
-; AVX512VL-NEXT: kortestd %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512VL-NEXT: kortestw %k0, %k0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <32 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -2486,18 +2378,18 @@ define i1 @icmp1_v64i8_v64i1(<64 x i8>) {
; AVX512BW-LABEL: icmp1_v64i8_v64i1:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestq %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kortestw %k0, %k0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp1_v64i8_v64i1:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
-; AVX512VL-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
-; AVX512VL-NEXT: kortestq %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512VL-NEXT: kortestw %k0, %k0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <64 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -2726,20 +2618,15 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
;
; AVX512BW-LABEL: icmp_v8i16_v8i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: cmpb $-1, %al
+; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vptest %xmm0, %xmm0
; AVX512BW-NEXT: sete %al
-; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp_v8i16_v8i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: cmpb $-1, %al
+; AVX512VL-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vptest %xmm0, %xmm0
; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: retq
%a = icmp eq <8 x i16> %0, %1
@@ -2779,19 +2666,16 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) {
;
; AVX512BW-LABEL: icmp_v16i8_v16i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestw %k0, %k0
-; AVX512BW-NEXT: setb %al
-; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vptest %xmm0, %xmm0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp_v16i8_v16i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
-; AVX512VL-NEXT: kortestw %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT: vptest %xmm0, %xmm0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: retq
%a = icmp eq <16 x i8> %0, %1
%b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
@@ -2913,36 +2797,13 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: icmp_v8i32_v8i1:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: cmpb $-1, %al
-; AVX512F-NEXT: sete %al
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: icmp_v8i32_v8i1:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: cmpb $-1, %al
-; AVX512BW-NEXT: sete %al
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: icmp_v8i32_v8i1:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: cmpb $-1, %al
-; AVX512VL-NEXT: sete %al
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: icmp_v8i32_v8i1:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vptest %ymm0, %ymm0
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%a = icmp eq <8 x i32> %0, %1
%b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
ret i1 %b
@@ -3000,19 +2861,17 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
;
; AVX512BW-LABEL: icmp_v16i16_v16i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestw %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vptest %ymm0, %ymm0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp_v16i16_v16i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
-; AVX512VL-NEXT: kortestw %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vptest %ymm0, %ymm0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <16 x i16> %0, %1
@@ -3082,19 +2941,17 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
;
; AVX512BW-LABEL: icmp_v32i8_v32i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
-; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestd %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vptest %ymm0, %ymm0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp_v32i8_v32i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
-; AVX512VL-NEXT: kortestd %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT: vptest %ymm0, %ymm0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <32 x i8> %0, %1
@@ -3146,32 +3003,13 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: icmp_v8i64_v8i1:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, %eax
-; AVX512F-NEXT: cmpb $-1, %al
-; AVX512F-NEXT: sete %al
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: icmp_v8i64_v8i1:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: cmpb $-1, %al
-; AVX512BW-NEXT: sete %al
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512VL-LABEL: icmp_v8i64_v8i1:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; AVX512VL-NEXT: kmovd %k0, %eax
-; AVX512VL-NEXT: cmpb $-1, %al
-; AVX512VL-NEXT: sete %al
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
+; AVX512-LABEL: icmp_v8i64_v8i1:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512-NEXT: kortestw %k0, %k0
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%a = icmp eq <8 x i64> %0, %1
%b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
ret i1 %b
@@ -3223,9 +3061,9 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) {
;
; AVX512-LABEL: icmp_v16i32_v16i1:
; AVX512: # %bb.0:
-; AVX512-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
; AVX512-NEXT: kortestw %k0, %k0
-; AVX512-NEXT: setb %al
+; AVX512-NEXT: sete %al
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%a = icmp eq <16 x i32> %0, %1
@@ -3301,17 +3139,17 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
;
; AVX512BW-LABEL: icmp_v32i16_v32i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestd %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kortestw %k0, %k0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp_v32i16_v32i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
-; AVX512VL-NEXT: kortestd %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512VL-NEXT: kortestw %k0, %k0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <32 x i16> %0, %1
@@ -3392,17 +3230,17 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) {
;
; AVX512BW-LABEL: icmp_v64i8_v64i1:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kortestq %k0, %k0
-; AVX512BW-NEXT: setb %al
+; AVX512BW-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kortestw %k0, %k0
+; AVX512BW-NEXT: sete %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: icmp_v64i8_v64i1:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
-; AVX512VL-NEXT: kortestq %k0, %k0
-; AVX512VL-NEXT: setb %al
+; AVX512VL-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512VL-NEXT: kortestw %k0, %k0
+; AVX512VL-NEXT: sete %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
%a = icmp eq <64 x i8> %0, %1
More information about the llvm-commits
mailing list