[llvm] 389ae77 - [X86] Fold TESTZ(OR(LO(X), HI(X)), OR(LO(Y), HI(Y))) -> TESTZ(X,Y)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 27 05:22:31 PST 2022
Author: Simon Pilgrim
Date: 2022-01-27T13:20:36Z
New Revision: 389ae775e44ea0c8cd42a3ae518e79b837af4990
URL: https://github.com/llvm/llvm-project/commit/389ae775e44ea0c8cd42a3ae518e79b837af4990
DIFF: https://github.com/llvm/llvm-project/commit/389ae775e44ea0c8cd42a3ae518e79b837af4990.diff
LOG: [X86] Fold TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y)
Helps fix a number of poor codegen cases for allof(cmp()) with 256-bit vectors on AVX1
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-ptest.ll
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ba606d7a80edb..240be748a89ec 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44407,6 +44407,23 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
// TESTZ(X,-1) == TESTZ(X,X)
if (ISD::isBuildVectorAllOnes(Op1.getNode()))
return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);
+
+ // TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y)
+ // TODO: Add COND_NE handling?
+ if (CC == X86::COND_E && OpVT.is128BitVector() && Subtarget.hasAVX()) {
+ SDValue Src0 = peekThroughBitcasts(Op0);
+ SDValue Src1 = peekThroughBitcasts(Op1);
+ if (Src0.getOpcode() == ISD::OR && Src1.getOpcode() == ISD::OR) {
+ Src0 = getSplitVectorSrc(peekThroughBitcasts(Src0.getOperand(0)),
+ peekThroughBitcasts(Src0.getOperand(1)), true);
+ Src1 = getSplitVectorSrc(peekThroughBitcasts(Src1.getOperand(0)),
+ peekThroughBitcasts(Src1.getOperand(1)), true);
+ if (Src0 && Src1)
+ return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
+ DAG.getBitcast(MVT::v4i64, Src0),
+ DAG.getBitcast(MVT::v4i64, Src1));
+ }
+ }
}
return SDValue();
diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index 5781ae6e70a7f..33691308fd938 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -397,29 +397,13 @@ define i32 @ptestz_v32i8_signbits(<32 x i8> %c, i32 %a, i32 %b) {
;
define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
-; AVX1-LABEL: ptestz_v2i64_concat:
-; AVX1: # %bb.0:
-; AVX1-NEXT: movl %edi, %eax
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpor %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vptest %xmm1, %xmm0
-; AVX1-NEXT: cmovnel %esi, %eax
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: ptestz_v2i64_concat:
-; AVX2: # %bb.0:
-; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpor %xmm1, %xmm3, %xmm1
-; AVX2-NEXT: vptest %xmm1, %xmm0
-; AVX2-NEXT: cmovnel %esi, %eax
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; CHECK-LABEL: ptestz_v2i64_concat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: vptest %ymm1, %ymm0
+; CHECK-NEXT: cmovnel %esi, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%t1 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
%t2 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
%t3 = shufflevector <4 x i64> %d, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 4be3ed18cf901..c94ca6ca5deb3 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1202,21 +1202,12 @@ define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
-; AVX1-LABEL: icmp0_v16i16_v16i1:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vptest %xmm0, %xmm0
-; AVX1-NEXT: sete %al
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: icmp0_v16i16_v16i1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vptest %ymm0, %ymm0
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; AVX-LABEL: icmp0_v16i16_v16i1:
+; AVX: # %bb.0:
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: sete %al
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
;
; AVX512F-LABEL: icmp0_v16i16_v16i1:
; AVX512F: # %bb.0:
@@ -1268,21 +1259,12 @@ define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
-; AVX1-LABEL: icmp0_v32i8_v32i1:
-; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vptest %xmm0, %xmm0
-; AVX1-NEXT: sete %al
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: icmp0_v32i8_v32i1:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vptest %ymm0, %ymm0
-; AVX2-NEXT: sete %al
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; AVX-LABEL: icmp0_v32i8_v32i1:
+; AVX: # %bb.0:
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: sete %al
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
;
; AVX512F-LABEL: icmp0_v32i8_v32i1:
; AVX512F: # %bb.0:
@@ -1504,9 +1486,7 @@ define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
; AVX1-LABEL: icmp0_v32i16_v32i1:
; AVX1: # %bb.0:
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vptest %xmm0, %xmm0
+; AVX1-NEXT: vptest %ymm0, %ymm0
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1586,9 +1566,7 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
; AVX1-LABEL: icmp0_v64i8_v64i1:
; AVX1: # %bb.0:
; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vptest %xmm0, %xmm0
+; AVX1-NEXT: vptest %ymm0, %ymm0
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
More information about the llvm-commits
mailing list