[PATCH] D60838: [x86] use psubus for more vsetcc lowering (PR39859)
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 23 08:20:50 PDT 2019
This revision was automatically updated to reflect the committed changes.
Closed by commit rL358999: [x86] use psubus for more vsetcc lowering (PR39859) (authored by spatel, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D60838?vs=195729&id=196255#toc
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D60838/new/
https://reviews.llvm.org/D60838
Files:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_setcc-2.ll
Index: llvm/trunk/test/CodeGen/X86/vec_setcc-2.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/vec_setcc-2.ll
+++ llvm/trunk/test/CodeGen/X86/vec_setcc-2.ll
@@ -194,8 +194,10 @@
define <8 x i1> @ugt_v8i16_splat(<8 x i16> %x) {
; SSE2-LABEL: ugt_v8i16_splat:
; SSE2: ## %bb.0:
-; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
-; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [243,243,243,243,243,243,243,243]
+; SSE2-NEXT: psubusw %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: ugt_v8i16_splat:
@@ -541,9 +543,10 @@
define <8 x i16> @PR39859(<8 x i16> %x, <8 x i16> %y) {
; SSE2-LABEL: PR39859:
; SSE2: ## %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE2-NEXT: pxor %xmm0, %xmm2
-; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [43,43,43,43,43,43,43,43]
+; SSE2-NEXT: psubusw %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpeqw %xmm3, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm0, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -19747,10 +19747,11 @@
return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode);
}
-/// Given a simple buildvector constant, return a new vector constant with each
-/// element decremented. If decrementing would result in underflow or this
-/// is not a simple vector constant, return an empty value.
-static SDValue decrementVectorConstant(SDValue V, SelectionDAG &DAG) {
+/// Given a buildvector constant, return a new vector constant with each element
+/// incremented or decremented. If incrementing or decrementing would result in
+/// unsigned overflow or underflow or this is not a simple vector constant,
+/// return an empty value.
+static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
if (!BV)
return SDValue();
@@ -19765,11 +19766,12 @@
if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT)
return SDValue();
- // Avoid underflow.
- if (Elt->getAPIntValue().isNullValue())
+ // Avoid overflow/underflow.
+ const APInt &EltC = Elt->getAPIntValue();
+ if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isNullValue()))
return SDValue();
- NewVecC.push_back(DAG.getConstant(Elt->getAPIntValue() - 1, DL, EltVT));
+ NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
}
return DAG.getBuildVector(VT, DL, NewVecC);
@@ -19801,12 +19803,24 @@
// Only do this pre-AVX since vpcmp* is no longer destructive.
if (Subtarget.hasAVX())
return SDValue();
- SDValue ULEOp1 = decrementVectorConstant(Op1, DAG);
+ SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, false);
if (!ULEOp1)
return SDValue();
Op1 = ULEOp1;
break;
}
+ case ISD::SETUGT: {
+ // If the comparison is against a constant, we can turn this into a setuge.
+ // This is beneficial because materializing a constant 0 for the PCMPEQ is
+ // probably cheaper than XOR+PCMPGT using 2 different vector constants:
+ // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0
+ SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, true);
+ if (!UGEOp1)
+ return SDValue();
+ Op1 = Op0;
+ Op0 = UGEOp1;
+ break;
+ }
// Psubus is better than flip-sign because it requires no inversion.
case ISD::SETUGE:
std::swap(Op0, Op1);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D60838.196255.patch
Type: text/x-patch
Size: 3825 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190423/f8035222/attachment.bin>
More information about the llvm-commits
mailing list