[PATCH] D17181: [X86] Don't turn (c?-v:v) into (c?-v:0) by blindly using PSIGN.

Mon Feb 15 16:29:25 PST 2016

ab updated this revision to Diff 48038.
ab added a comment.

Simplify further by subtracting the mask.


http://reviews.llvm.org/D17181

Files:
  lib/Target/X86/X86ISelLowering.cpp
  test/CodeGen/X86/avx2-logic.ll
  test/CodeGen/X86/vec-sign.ll

Index: test/CodeGen/X86/vec-sign.ll
===================================================================

--- test/CodeGen/X86/vec-sign.ll
+++ test/CodeGen/X86/vec-sign.ll
@@ -3,21 +3,12 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
 
 define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind {
-; SSE2-LABEL: signd:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    pxor %xmm2, %xmm2
-; SSE2-NEXT:    psubd %xmm0, %xmm2
-; SSE2-NEXT:    pand %xmm1, %xmm2
-; SSE2-NEXT:    pandn %xmm0, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: signd:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    psignd %xmm1, %xmm0
-; SSE41-NEXT:    retq
+; ALL-LABEL: signd:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    psrad $31, %xmm1
+; ALL-NEXT:    pxor %xmm1, %xmm0
+; ALL-NEXT:    psubd %xmm1, %xmm0
+; ALL-NEXT:    retq
 entry:
   %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
   %sub = sub nsw <4 x i32> zeroinitializer, %a
Index: test/CodeGen/X86/avx2-logic.ll
===================================================================
--- test/CodeGen/X86/avx2-logic.ll
+++ test/CodeGen/X86/avx2-logic.ll
@@ -72,7 +72,9 @@
 define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; CHECK-LABEL: signd:
 ; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vpsignd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpsrad $31, %ymm1, %ymm1
+; CHECK-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retq
 entry:
   %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -26409,8 +26409,7 @@
 
   // look for psign/blend
   if (VT == MVT::v2i64 || VT == MVT::v4i64) {
-    if (!Subtarget.hasSSSE3() ||
-        (VT == MVT::v4i64 && !Subtarget.hasInt256()))
+    if (VT == MVT::v4i64 && !Subtarget.hasInt256())
       return SDValue();
 
     // Canonicalize pandn to RHS
@@ -26459,16 +26458,29 @@
 
       SDLoc DL(N);
 
-      // Now we know we at least have a plendvb with the mask val.  See if
-      // we can form a psignb/w/d.
-      // psign = x.type == y.type == mask.type && y = sub(0, x);
+      // Try to match:
+      //   (or (and (M, (sub 0, X)), (pandn M, X)))
+      // which is a special case of vselect:
+      //   (vselect M, (sub 0, X), X)
       if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
           ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
           X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
-        assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
-               "Unsupported VT for PSIGN");
-        Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
-        return DAG.getBitcast(VT, Mask);
+        assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
+        // Per:
+        // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
+        // We know that, if fNegate is 0 or 1:
+        //   (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
+        //
+        // Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
+        //   ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
+        //   ( M      ? -X : X) == ((X ^   M     ) + (M & 1))
+        // This lets us transform our vselect to:
+        //   (add (xor X, M), (and M, 1))
+        // And further to:
+        //   (sub (xor X, M), M)
+        return DAG.getBitcast(
+            VT, DAG.getNode(ISD::SUB, DL, MaskVT,
+                            DAG.getNode(ISD::XOR, DL, MaskVT, X, Mask), Mask));
       }
       // PBLENDVB only available on SSE 4.1
       if (!Subtarget.hasSSE41())


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D17181.48038.patch
Type: text/x-patch
Size: 3942 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160216/e8c9a985/attachment.bin>