[llvm] d514382 - [X86] combinePTESTCC - attempt to use TESTPS/TESTPD instead of MOVMSKPS/MOVMSKPD for all-of cases with all-sign values.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 12 07:33:26 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-12T15:33:12+01:00
New Revision: d5143827956a89f7fd05a44b42ed947db4a07e3b
URL: https://github.com/llvm/llvm-project/commit/d5143827956a89f7fd05a44b42ed947db4a07e3b
DIFF: https://github.com/llvm/llvm-project/commit/d5143827956a89f7fd05a44b42ed947db4a07e3b.diff
LOG: [X86] combinePTESTCC - attempt to use TESTPS/TESTPD instead of MOVMSKPS/MOVMSKPD for all-of cases with all-sign values.
We can probably be more aggressive with TESTPS/TESTPD (instead of relying on the SimplifyMultipleUseDemandedBits call) - I've updated an existing TODO to suggest this for now.
Part of Issue #60007
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-ptest.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0a0918139b3d..a621c83075bb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47351,12 +47351,11 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
DAG.getBitcast(OpVT, BC.getOperand(1)));
}
- // If every element is an all-sign value, see if we can use MOVMSK to
- // more efficiently extract the sign bits and compare that.
+ // If every element is an all-sign value, see if we can use TESTP/MOVMSK
+ // to more efficiently extract the sign bits and compare that.
// TODO: Handle TESTC with comparison inversion.
- // TODO: When can we use TESTPS/TESTPD instead?
// TODO: Can we remove SimplifyMultipleUseDemandedBits and rely on
- // MOVMSK combines to make sure its never worse than PTEST?
+ // TESTP/MOVMSK combines to make sure its never worse than PTEST?
if (BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT)) {
unsigned EltBits = BCVT.getScalarSizeInBits();
if (DAG.ComputeNumSignBits(BC) == EltBits) {
@@ -47368,7 +47367,13 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
// For vXi16 cases we need to use pmovmksb and extract every other
// sign bit.
SDLoc DL(EFLAGS);
- if (EltBits == 16) {
+ if (EltBits == 32 || EltBits == 64) {
+ MVT FloatSVT = MVT::getFloatingPointVT(EltBits);
+ MVT FloatVT =
+ MVT::getVectorVT(FloatSVT, OpVT.getSizeInBits() / EltBits);
+ Res = DAG.getBitcast(FloatVT, Res);
+ return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Res, Res);
+ } else if (EltBits == 16) {
MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
Res = DAG.getBitcast(MovmskVT, Res);
Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index 61ca9208a100..d86600360d87 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -318,15 +318,14 @@ start:
}
;
-; testz(ashr(X,bw-1),-1) -> movmsk(X)
+; testz(ashr(X,bw-1),-1) -> testpd/testps/pmovmskb(X)
;
define i32 @ptestz_v2i64_signbits(<2 x i64> %c, i32 %a, i32 %b) {
; CHECK-LABEL: ptestz_v2i64_signbits:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vmovmskpd %xmm0, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: vtestpd %xmm0, %xmm0
; CHECK-NEXT: cmovnel %esi, %eax
; CHECK-NEXT: retq
%t1 = ashr <2 x i64> %c, <i64 63, i64 63>
@@ -352,8 +351,7 @@ define i32 @ptestz_v8i32_signbits(<8 x i32> %c, i32 %a, i32 %b) {
; AVX2-LABEL: ptestz_v8i32_signbits:
; AVX2: # %bb.0:
; AVX2-NEXT: movl %edi, %eax
-; AVX2-NEXT: vmovmskps %ymm0, %ecx
-; AVX2-NEXT: testl %ecx, %ecx
+; AVX2-NEXT: vtestps %ymm0, %ymm0
; AVX2-NEXT: cmovnel %esi, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
More information about the llvm-commits
mailing list