[llvm] c143db3 - [X86][SSE] combineHorizontalPredicateResult - improve all_of(X == 0) for vXi64 on pre-SSE41 targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 19 03:43:40 PDT 2020
Author: Simon Pilgrim
Date: 2020-06-19T11:43:25+01:00
New Revision: c143db3b1032042193c152790bcefe34365e6d6c
URL: https://github.com/llvm/llvm-project/commit/c143db3b1032042193c152790bcefe34365e6d6c
DIFF: https://github.com/llvm/llvm-project/commit/c143db3b1032042193c152790bcefe34365e6d6c.diff
LOG: [X86][SSE] combineHorizontalPredicateResult - improve all_of(X == 0) for vXi64 on pre-SSE41 targets
Without SSE41 we don't have the PCMPEQQ instruction, making cmp-with-zero reductions more complicated than necessary. We can compare as vXi32 (PCMPEQD) and tweak the MOVMSK comparison to test upper/lower DWORD comparisons.
This pre-fixes something that occurs with null tests for vectors of (64-bit) pointers such as in PR35129.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cd5842358e60..e2a9231065e0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38504,6 +38504,25 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract,
EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
Movmsk = DAG.getBitcast(MovmskVT, Match);
} else {
+ // For all_of(setcc(vec,0,eq)) - avoid vXi64 comparisons if we don't have
+ // PCMPEQQ (SSE41+), use PCMPEQD instead.
+ if (BinOp == ISD::AND && !Subtarget.hasSSE41() &&
+ Match.getOpcode() == ISD::SETCC &&
+ ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) &&
+ cast<CondCodeSDNode>(Match.getOperand(2))->get() ==
+ ISD::CondCode::SETEQ) {
+ SDValue Vec = Match.getOperand(0);
+ if (Vec.getValueType().getScalarType() == MVT::i64 &&
+ (2 * NumElts) <= MaxElts) {
+ NumElts *= 2;
+ EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+ MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
+ Match = DAG.getSetCC(
+ DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
+ DAG.getBitcast(CmpVT, Match.getOperand(1)), ISD::CondCode::SETEQ);
+ }
+ }
+
// Use combineBitcastvxi1 to create the MOVMSK.
while (NumElts > MaxElts) {
SDValue Lo, Hi;
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index ce6a4241b59a..4c34ee8aeb94 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -858,10 +858,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: movmskpd %xmm0, %eax
-; SSE2-NEXT: cmpb $3, %al
+; SSE2-NEXT: movmskps %xmm1, %eax
+; SSE2-NEXT: cmpb $15, %al
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@@ -1088,14 +1086,11 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqd %xmm2, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
-; SSE2-NEXT: pand %xmm1, %xmm3
; SSE2-NEXT: pcmpeqd %xmm2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: packssdw %xmm3, %xmm1
-; SSE2-NEXT: movmskps %xmm1, %eax
-; SSE2-NEXT: cmpb $15, %al
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: packsswb %xmm0, %xmm0
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: cmpb $-1, %al
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@@ -1383,23 +1378,14 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
; SSE2: # %bb.0:
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2]
-; SSE2-NEXT: pand %xmm3, %xmm5
; SSE2-NEXT: pcmpeqd %xmm4, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
-; SSE2-NEXT: pand %xmm2, %xmm3
-; SSE2-NEXT: packssdw %xmm5, %xmm3
+; SSE2-NEXT: packssdw %xmm3, %xmm2
; SSE2-NEXT: pcmpeqd %xmm4, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
-; SSE2-NEXT: pand %xmm1, %xmm2
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: packssdw %xmm2, %xmm1
-; SSE2-NEXT: packssdw %xmm3, %xmm1
-; SSE2-NEXT: packsswb %xmm1, %xmm1
-; SSE2-NEXT: pmovmskb %xmm1, %eax
-; SSE2-NEXT: cmpb $-1, %al
+; SSE2-NEXT: packssdw %xmm1, %xmm0
+; SSE2-NEXT: packsswb %xmm2, %xmm0
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
More information about the llvm-commits
mailing list