[llvm] f2d03a2 - [X86] combinePTESTCC - only fold TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y) for TESTP ops

Mon Apr 17 03:43:10 PDT 2023

Author: Simon Pilgrim
Date: 2023-04-17T11:43:00+01:00
New Revision: f2d03a2533c64068de3e85483aed583ab173b18a

URL: https://github.com/llvm/llvm-project/commit/f2d03a2533c64068de3e85483aed583ab173b18a
DIFF: https://github.com/llvm/llvm-project/commit/f2d03a2533c64068de3e85483aed583ab173b18a.diff

LOG: [X86] combinePTESTCC - only fold TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y) for TESTP ops

Stop value type mismatch assertions, we can add TESTPS/TESTPD support to this in the future.

Fixes #62171

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/combine-testpd.ll
    llvm/test/CodeGen/X86/combine-testps.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d7bccd6e334c2..414725ef5307b 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47433,7 +47433,9 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
 
     // TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y)
     // TODO: Add COND_NE handling?
-    if (CC == X86::COND_E && OpVT.is128BitVector() && Subtarget.hasAVX()) {
+    // TODO: Add TESTP handling
+    if (CC == X86::COND_E && OpVT.is128BitVector() && Subtarget.hasAVX() && 
+        EFLAGS.getOpcode() == X86ISD::PTEST) {
       SDValue Src0 = peekThroughBitcasts(Op0);
       SDValue Src1 = peekThroughBitcasts(Op1);
       if (Src0.getOpcode() == ISD::OR && Src1.getOpcode() == ISD::OR) {

diff  --git a/llvm/test/CodeGen/X86/combine-testpd.ll b/llvm/test/CodeGen/X86/combine-testpd.ll
index 842af121ee545..8652c591430b1 100644
--- a/llvm/test/CodeGen/X86/combine-testpd.ll
+++ b/llvm/test/CodeGen/X86/combine-testpd.ll
@@ -206,6 +206,29 @@ define i32 @testpdnzc_256_signbit_multiuse(<4 x double> %c, i32 %a, i32 %b) {
   ret i32 %t6
 }
 
+; FIXME: Foldable to vtestpd(vcmpeqpd(ymm0,ymm1),vcmpeqpd(ymm0,ymm1))
+define i1 @PR62171(<4 x double> %a0, <4 x double> %a1) {
+; CHECK-LABEL: PR62171:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vorpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vtestpd %xmm0, %xmm0
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cmp = fcmp oeq <4 x double> %a0, %a1
+  %sext = sext <4 x i1> %cmp to <4 x i64>
+  %extract = shufflevector <4 x i64> %sext, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+  %extract1 = shufflevector <4 x i64> %sext, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+  %or = or <2 x i64> %extract, %extract1
+  %or1 = bitcast <2 x i64> %or to <16 x i8>
+  %msk = icmp slt <16 x i8> %or1, zeroinitializer
+  %msk1 = bitcast <16 x i1> %msk to i16
+  %not = icmp eq i16 %msk1, 0
+  ret i1 %not
+}
+
 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone

diff  --git a/llvm/test/CodeGen/X86/combine-testps.ll b/llvm/test/CodeGen/X86/combine-testps.ll
index b35ec391b29e1..43f99470cccd5 100644
--- a/llvm/test/CodeGen/X86/combine-testps.ll
+++ b/llvm/test/CodeGen/X86/combine-testps.ll
@@ -205,6 +205,29 @@ define i32 @testpsc_256_signbit_multiuse(<8 x float> %c, i32 %a, i32 %b) {
   ret i32 %t5
 }
 
+; FIXME: Foldable to vtestps(vcmpeqps(ymm0,ymm1),vcmpeqps(ymm0,ymm1))
+define i1 @PR62171(<8 x float> %a0, <8 x float> %a1) {
+; CHECK-LABEL: PR62171:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vtestps %xmm0, %xmm0
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cmp = fcmp oeq <8 x float> %a0, %a1
+  %sext = sext <8 x i1> %cmp to <8 x i32>
+  %extract = shufflevector <8 x i32> %sext, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %extract1 = shufflevector <8 x i32> %sext, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %or = or <4 x i32> %extract, %extract1
+  %or1 = bitcast <4 x i32> %or to <16 x i8>
+  %msk = icmp slt <16 x i8> %or1, zeroinitializer
+  %msk1 = bitcast <16 x i1> %msk to i16
+  %not = icmp eq i16 %msk1, 0
+  ret i1 %not
+}
+
 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone