[llvm] [X86] combinePTESTCC - fold PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X) (PR #123466)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 18 08:21:53 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/123466
Simplifies the hidden "all_of(X == 0)" pattern
Fixes #123456
>From c5941fe6d2a678b8863838629a9f69b5171ba4c4 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Sat, 18 Jan 2025 16:15:57 +0000
Subject: [PATCH] [X86] combinePTESTCC - fold PTESTC(PCMPEQ(X,0),-1) ==
PTESTZ(X,X)
Simplifies the hidden "all_of(X == 0)" pattern
Fixes #123456
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++++
llvm/test/CodeGen/X86/combine-ptest.ll | 40 ++++++-------------------
2 files changed, 21 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 410b08912a5e24..33ddcb57e9b08b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48054,6 +48054,18 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
DAG.getAllOnesConstant(DL, NotOp1.getValueType())));
}
}
+ // PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X)
+ if (EFLAGS.getOpcode() == X86ISD::PTEST &&
+ ISD::isBuildVectorAllOnes(Op1.getNode())) {
+ SDValue BC0 = peekThroughBitcasts(Op0);
+ if (BC0.getOpcode() == X86ISD::PCMPEQ &&
+ ISD::isBuildVectorAllZeros(BC0.getOperand(1).getNode())) {
+ SDLoc DL(EFLAGS);
+ CC = (CC == X86::COND_B ? X86::COND_E : X86::COND_NE);
+ SDValue X = DAG.getBitcast(OpVT, BC0.getOperand(0));
+ return DAG.getNode(EFLAGS.getOpcode(), DL, VT, X, X);
+ }
+ }
}
if (CC == X86::COND_E || CC == X86::COND_NE) {
diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index f5ab700caea467..fda14027e994e3 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -376,20 +376,14 @@ define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) {
; SSE-LABEL: ptestc_v4i32_eq0:
; SSE: # %bb.0:
-; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE-NEXT: ptest %xmm0, %xmm1
-; SSE-NEXT: setb %al
+; SSE-NEXT: ptest %xmm0, %xmm0
+; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
; AVX-LABEL: ptestc_v4i32_eq0:
; AVX: # %bb.0:
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vptest %xmm1, %xmm0
-; AVX-NEXT: setb %al
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: sete %al
; AVX-NEXT: retq
%icmp = icmp eq <4 x i32> %a0, zeroinitializer
%sext = sext <4 x i1> %icmp to <4 x i32>
@@ -403,22 +397,14 @@ define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) {
define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: ptestc_v4i32_and_eq0:
; SSE: # %bb.0:
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm1
-; SSE-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: ptest %xmm0, %xmm1
-; SSE-NEXT: setb %al
+; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
; AVX-LABEL: ptestc_v4i32_and_eq0:
; AVX: # %bb.0:
-; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vptest %xmm1, %xmm0
-; AVX-NEXT: setb %al
+; AVX-NEXT: vptest %xmm0, %xmm1
+; AVX-NEXT: sete %al
; AVX-NEXT: retq
%and = and <4 x i32> %a1, %a0
%icmp = icmp eq <4 x i32> %and, zeroinitializer
@@ -433,21 +419,13 @@ define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) {
define i1 @ptestc_v4i32_andnot_eq0(<4 x i32> %a0, <4 x i32> %a1) {
; SSE-LABEL: ptestc_v4i32_andnot_eq0:
; SSE: # %bb.0:
-; SSE-NEXT: pandn %xmm0, %xmm1
-; SSE-NEXT: pxor %xmm0, %xmm0
-; SSE-NEXT: pcmpeqd %xmm1, %xmm0
-; SSE-NEXT: pcmpeqd %xmm1, %xmm1
-; SSE-NEXT: ptest %xmm1, %xmm0
+; SSE-NEXT: ptest %xmm0, %xmm1
; SSE-NEXT: setae %al
; SSE-NEXT: retq
;
; AVX-LABEL: ptestc_v4i32_andnot_eq0:
; AVX: # %bb.0:
-; AVX-NEXT: vpandn %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vptest %xmm1, %xmm0
+; AVX-NEXT: vptest %xmm0, %xmm1
; AVX-NEXT: setae %al
; AVX-NEXT: retq
%not = xor <4 x i32> %a1, splat (i32 -1)
More information about the llvm-commits
mailing list