[llvm] [X86] combinePTESTCC - fold PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X) (PR #123466)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 18 08:22:28 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

Simplifies the hidden "all_of(X == 0)" pattern

Fixes #<!-- -->123456

---
Full diff: https://github.com/llvm/llvm-project/pull/123466.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+12) 
- (modified) llvm/test/CodeGen/X86/combine-ptest.ll (+9-31) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 410b08912a5e24..33ddcb57e9b08b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48054,6 +48054,18 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
                            DAG.getAllOnesConstant(DL, NotOp1.getValueType())));
       }
     }
+    // PTESTC(PCMPEQ(X,0),-1) == PTESTZ(X,X)
+    if (EFLAGS.getOpcode() == X86ISD::PTEST &&
+        ISD::isBuildVectorAllOnes(Op1.getNode())) {
+      SDValue BC0 = peekThroughBitcasts(Op0);
+      if (BC0.getOpcode() == X86ISD::PCMPEQ &&
+          ISD::isBuildVectorAllZeros(BC0.getOperand(1).getNode())) {
+        SDLoc DL(EFLAGS);
+        CC = (CC == X86::COND_B ? X86::COND_E : X86::COND_NE);
+        SDValue X = DAG.getBitcast(OpVT, BC0.getOperand(0));
+        return DAG.getNode(EFLAGS.getOpcode(), DL, VT, X, X);
+      }
+    }
   }
 
   if (CC == X86::COND_E || CC == X86::COND_NE) {
diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index f5ab700caea467..fda14027e994e3 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -376,20 +376,14 @@ define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
 define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) {
 ; SSE-LABEL: ptestc_v4i32_eq0:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
-; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE-NEXT:    ptest %xmm0, %xmm1
-; SSE-NEXT:    setb %al
+; SSE-NEXT:    ptest %xmm0, %xmm0
+; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ptestc_v4i32_eq0:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vptest %xmm1, %xmm0
-; AVX-NEXT:    setb %al
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %icmp = icmp eq <4 x i32> %a0, zeroinitializer
   %sext = sext <4 x i1> %icmp to <4 x i32>
@@ -403,22 +397,14 @@ define i1 @ptestc_v4i32_eq0(<4 x i32> %a0) {
 define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) {
 ; SSE-LABEL: ptestc_v4i32_and_eq0:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pand %xmm1, %xmm0
-; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
-; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
 ; SSE-NEXT:    ptest %xmm0, %xmm1
-; SSE-NEXT:    setb %al
+; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ptestc_v4i32_and_eq0:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vptest %xmm1, %xmm0
-; AVX-NEXT:    setb %al
+; AVX-NEXT:    vptest %xmm0, %xmm1
+; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %and = and <4 x i32> %a1, %a0
   %icmp = icmp eq <4 x i32> %and, zeroinitializer
@@ -433,21 +419,13 @@ define i1 @ptestc_v4i32_and_eq0(<4 x i32> %a0, <4 x i32> %a1) {
 define i1 @ptestc_v4i32_andnot_eq0(<4 x i32> %a0, <4 x i32> %a1) {
 ; SSE-LABEL: ptestc_v4i32_andnot_eq0:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pandn %xmm0, %xmm1
-; SSE-NEXT:    pxor %xmm0, %xmm0
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
-; SSE-NEXT:    ptest %xmm1, %xmm0
+; SSE-NEXT:    ptest %xmm0, %xmm1
 ; SSE-NEXT:    setae %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ptestc_v4i32_andnot_eq0:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpandn %xmm0, %xmm1, %xmm0
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vptest %xmm1, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm1
 ; AVX-NEXT:    setae %al
 ; AVX-NEXT:    retq
   %not = xor <4 x i32> %a1, splat (i32 -1)

``````````

</details>


https://github.com/llvm/llvm-project/pull/123466


More information about the llvm-commits mailing list