[PATCH] D17181: [X86] Don't turn (c?-v:v) into (c?-v:0) by blindly using PSIGN.
Ahmed Bougacha via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 15 16:29:25 PST 2016
ab updated this revision to Diff 48038.
ab added a comment.
Simplify further by subtracting the mask.
http://reviews.llvm.org/D17181
Files:
lib/Target/X86/X86ISelLowering.cpp
test/CodeGen/X86/avx2-logic.ll
test/CodeGen/X86/vec-sign.ll
Index: test/CodeGen/X86/vec-sign.ll
===================================================================
--- test/CodeGen/X86/vec-sign.ll
+++ test/CodeGen/X86/vec-sign.ll
@@ -3,21 +3,12 @@
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind {
-; SSE2-LABEL: signd:
-; SSE2: # BB#0: # %entry
-; SSE2-NEXT: psrad $31, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: psubd %xmm0, %xmm2
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm0, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: signd:
-; SSE41: # BB#0: # %entry
-; SSE41-NEXT: psignd %xmm1, %xmm0
-; SSE41-NEXT: retq
+; ALL-LABEL: signd:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: psrad $31, %xmm1
+; ALL-NEXT: pxor %xmm1, %xmm0
+; ALL-NEXT: psubd %xmm1, %xmm0
+; ALL-NEXT: retq
entry:
%b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
%sub = sub nsw <4 x i32> zeroinitializer, %a
Index: test/CodeGen/X86/avx2-logic.ll
===================================================================
--- test/CodeGen/X86/avx2-logic.ll
+++ test/CodeGen/X86/avx2-logic.ll
@@ -72,7 +72,9 @@
define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
; CHECK-LABEL: signd:
; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpsignd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpsrad $31, %ymm1, %ymm1
+; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
entry:
%b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -26409,8 +26409,7 @@
// look for psign/blend
if (VT == MVT::v2i64 || VT == MVT::v4i64) {
- if (!Subtarget.hasSSSE3() ||
- (VT == MVT::v4i64 && !Subtarget.hasInt256()))
+ if (VT == MVT::v4i64 && !Subtarget.hasInt256())
return SDValue();
// Canonicalize pandn to RHS
@@ -26459,16 +26458,29 @@
SDLoc DL(N);
- // Now we know we at least have a plendvb with the mask val. See if
- // we can form a psignb/w/d.
- // psign = x.type == y.type == mask.type && y = sub(0, x);
+ // Try to match:
+ // (or (and (M, (sub 0, X)), (pandn M, X)))
+ // which is a special case of vselect:
+ // (vselect M, (sub 0, X), X)
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
- assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
- "Unsupported VT for PSIGN");
- Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
- return DAG.getBitcast(VT, Mask);
+ assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
+ // Per:
+ // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
+ // We know that, if fNegate is 0 or 1:
+ // (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
+ //
+ // Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
+ // ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
+ // ( M ? -X : X) == ((X ^ M ) + (M & 1))
+ // This lets us transform our vselect to:
+ // (add (xor X, M), (and M, 1))
+ // And further to:
+ // (sub (xor X, M), M)
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::SUB, DL, MaskVT,
+ DAG.getNode(ISD::XOR, DL, MaskVT, X, Mask), Mask));
}
// PBLENDVB only available on SSE 4.1
if (!Subtarget.hasSSE41())
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D17181.48038.patch
Type: text/x-patch
Size: 3942 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160216/e8c9a985/attachment.bin>
More information about the llvm-commits
mailing list