[llvm] e56edb8 - [X86][SSE] Fold select(X > -1, A, B) -> select(0 > X, B, A) (PR47404)

Thu Sep 3 05:05:22 PDT 2020

Author: Simon Pilgrim
Date: 2020-09-03T13:02:08+01:00
New Revision: e56edb801bbd41925dbaca39162731613fb289f4

URL: https://github.com/llvm/llvm-project/commit/e56edb801bbd41925dbaca39162731613fb289f4
DIFF: https://github.com/llvm/llvm-project/commit/e56edb801bbd41925dbaca39162731613fb289f4.diff

LOG: [X86][SSE] Fold select(X > -1, A, B) -> select(0 > X, B, A) (PR47404)

Help PBLENDVB peek through to the sign bit source of the selection mask by swapping the select condition and inputs.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2af3d743728e..6560ab2bcccd 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40710,10 +40710,18 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     return V;
 
   // select(~Cond, X, Y) -> select(Cond, Y, X)
-  if (CondVT.getScalarType() != MVT::i1)
+  if (CondVT.getScalarType() != MVT::i1) {
     if (SDValue CondNot = IsNOT(Cond, DAG))
       return DAG.getNode(N->getOpcode(), DL, VT,
                          DAG.getBitcast(CondVT, CondNot), RHS, LHS);
+    // pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the signbit.
+    if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse() &&
+        ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) {
+      Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT,
+                         DAG.getConstant(0, DL, CondVT), Cond.getOperand(0));
+      return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);
+    }
+  }
 
   // Try to optimize vXi1 selects if both operands are either all constants or
   // bitcasts from scalar integer type. In that case we can convert the operands

diff  --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
index 26fd3cf7ab29..7039e33c0093 100644
--- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -261,18 +261,14 @@ define <16 x i8> @PR47404(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
 ; SSE-LABEL: PR47404:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movdqa %xmm0, %xmm3
-; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
-; SSE-NEXT:    pcmpgtb %xmm0, %xmm2
-; SSE-NEXT:    movdqa %xmm2, %xmm0
-; SSE-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
-; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: PR47404:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX-NEXT:    vpcmpgtb %xmm3, %xmm2, %xmm2
-; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %4 = icmp sgt <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %5 = select <16 x i1> %4, <16 x i8> %0, <16 x i8> %1