[llvm] d514382 - [X86] combinePTESTCC - attempt to use TESTPS/TESTPD instead of MOVMSKPS/MOVMSKPD for all-of cases with all-sign values.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 12 07:33:26 PDT 2023


Author: Simon Pilgrim
Date: 2023-04-12T15:33:12+01:00
New Revision: d5143827956a89f7fd05a44b42ed947db4a07e3b

URL: https://github.com/llvm/llvm-project/commit/d5143827956a89f7fd05a44b42ed947db4a07e3b
DIFF: https://github.com/llvm/llvm-project/commit/d5143827956a89f7fd05a44b42ed947db4a07e3b.diff

LOG: [X86] combinePTESTCC - attempt to use TESTPS/TESTPD instead of MOVMSKPS/MOVMSKPD for all-of cases with all-sign values.

We can probably be more aggressive with TESTPS/TESTPD (instead of relying on the SimplifyMultipleUseDemandedBits call) - I've updated an existing TODO to suggest this for now.

Part of Issue #60007

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/combine-ptest.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0a0918139b3d..a621c83075bb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47351,12 +47351,11 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
                            DAG.getBitcast(OpVT, BC.getOperand(1)));
       }
 
-      // If every element is an all-sign value, see if we can use MOVMSK to
-      // more efficiently extract the sign bits and compare that.
+      // If every element is an all-sign value, see if we can use TESTP/MOVMSK
+      // to more efficiently extract the sign bits and compare that.
       // TODO: Handle TESTC with comparison inversion.
-      // TODO: When can we use TESTPS/TESTPD instead?
       // TODO: Can we remove SimplifyMultipleUseDemandedBits and rely on
-      // MOVMSK combines to make sure its never worse than PTEST?
+      // TESTP/MOVMSK combines to make sure its never worse than PTEST?
       if (BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT)) {
         unsigned EltBits = BCVT.getScalarSizeInBits();
         if (DAG.ComputeNumSignBits(BC) == EltBits) {
@@ -47368,7 +47367,13 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
             // For vXi16 cases we need to use pmovmksb and extract every other
             // sign bit.
             SDLoc DL(EFLAGS);
-            if (EltBits == 16) {
+            if (EltBits == 32 || EltBits == 64) {
+              MVT FloatSVT = MVT::getFloatingPointVT(EltBits);
+              MVT FloatVT =
+                  MVT::getVectorVT(FloatSVT, OpVT.getSizeInBits() / EltBits);
+              Res = DAG.getBitcast(FloatVT, Res);
+              return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Res, Res);
+            } else if (EltBits == 16) {
               MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
               Res = DAG.getBitcast(MovmskVT, Res);
               Res = getPMOVMSKB(DL, Res, DAG, Subtarget);

diff  --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index 61ca9208a100..d86600360d87 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -318,15 +318,14 @@ start:
 }
 
 ;
-; testz(ashr(X,bw-1),-1) -> movmsk(X)
+; testz(ashr(X,bw-1),-1) -> testpd/testps/pmovmskb(X)
 ;
 
 define i32 @ptestz_v2i64_signbits(<2 x i64> %c, i32 %a, i32 %b) {
 ; CHECK-LABEL: ptestz_v2i64_signbits:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    vmovmskpd %xmm0, %ecx
-; CHECK-NEXT:    testl %ecx, %ecx
+; CHECK-NEXT:    vtestpd %xmm0, %xmm0
 ; CHECK-NEXT:    cmovnel %esi, %eax
 ; CHECK-NEXT:    retq
   %t1 = ashr <2 x i64> %c, <i64 63, i64 63>
@@ -352,8 +351,7 @@ define i32 @ptestz_v8i32_signbits(<8 x i32> %c, i32 %a, i32 %b) {
 ; AVX2-LABEL: ptestz_v8i32_signbits:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    movl %edi, %eax
-; AVX2-NEXT:    vmovmskps %ymm0, %ecx
-; AVX2-NEXT:    testl %ecx, %ecx
+; AVX2-NEXT:    vtestps %ymm0, %ymm0
 ; AVX2-NEXT:    cmovnel %esi, %eax
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq


        


More information about the llvm-commits mailing list