[llvm] r324844 - [X86][SSE] Use SplitBinaryOpsAndApply to recognise PSUBUS patterns before they're split on AVX1
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 11 09:29:43 PST 2018
Author: rksimon
Date: Sun Feb 11 09:29:42 2018
New Revision: 324844
URL: http://llvm.org/viewvc/llvm-project?rev=324844&view=rev
Log:
[X86][SSE] Use SplitBinaryOpsAndApply to recognise PSUBUS patterns before they're split on AVX1
This needs to be generalised further to support AVX512BW cases but I want to add non-uniform constants first.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/psubus.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324844&r1=324843&r2=324844&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Feb 11 09:29:42 2018
@@ -31821,7 +31821,7 @@ static SDValue combineSelect(SDNode *N,
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
((Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
- (Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
+ (Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
// Check if one of the arms of the VSELECT is a zero vector. If it's on the
@@ -31839,12 +31839,18 @@ static SDValue combineSelect(SDNode *N,
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
SDValue CondRHS = Cond->getOperand(1);
+ auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
+ SDValue Op1) {
+ return DAG.getNode(X86ISD::SUBUS, DL, Op0.getValueType(), Op0, Op1);
+ };
+
// Look for a general sub with unsigned saturation first.
// x >= y ? x-y : 0 --> subus x, y
// x > y ? x-y : 0 --> subus x, y
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
- return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
+ return SplitBinaryOpsAndApply(DAG, Subtarget, DL, VT, OpLHS, OpRHS,
+ SUBUSBuilder);
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
@@ -31856,9 +31862,10 @@ static SDValue combineSelect(SDNode *N,
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
CondRHSConst->getAPIntValue() ==
(-OpRHSConst->getAPIntValue() - 1))
- return DAG.getNode(
- X86ISD::SUBUS, DL, VT, OpLHS,
- DAG.getConstant(-OpRHSConst->getAPIntValue(), DL, VT));
+ return SplitBinaryOpsAndApply(
+ DAG, Subtarget, DL, VT, OpLHS,
+ DAG.getConstant(-OpRHSConst->getAPIntValue(), DL, VT),
+ SUBUSBuilder);
// Another special case: If C was a sign bit, the sub has been
// canonicalized into a xor.
@@ -31870,9 +31877,10 @@ static SDValue combineSelect(SDNode *N,
OpRHSConst->getAPIntValue().isSignMask())
// Note that we have to rebuild the RHS constant here to ensure we
// don't rely on particular values of undef lanes.
- return DAG.getNode(
- X86ISD::SUBUS, DL, VT, OpLHS,
- DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT));
+ return SplitBinaryOpsAndApply(
+ DAG, Subtarget, DL, VT, OpLHS,
+ DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT),
+ SUBUSBuilder);
}
}
}
Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=324844&r1=324843&r2=324844&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Sun Feb 11 09:29:42 2018
@@ -179,12 +179,10 @@ define <16 x i16> @test7(<16 x i16> %x)
; AVX1-LABEL: test7:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test7:
@@ -214,20 +212,10 @@ define <16 x i16> @test8(<16 x i16> %x)
; AVX1-LABEL: test8:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32766,32766,32766,32766,32766,32766,32766,32766]
-; AVX1-NEXT: vpminuw %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [32769,32769,32769,32769,32769,32769,32769,32769]
-; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpaddw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test8:
@@ -262,15 +250,9 @@ define <16 x i16> @test9(<16 x i16> %x,
; AVX1-NEXT: vmovd %edi, %xmm2
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,3,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; AVX1-NEXT: vpsubw %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm4
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
-; AVX1-NEXT: vpmaxuw %xmm2, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpmaxuw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test9:
@@ -305,12 +287,10 @@ define <32 x i8> @test10(<32 x i8> %x) n
; AVX1-LABEL: test10:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
-; AVX1-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX1-NEXT: vpsubusb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test10:
@@ -340,20 +320,10 @@ define <32 x i8> @test11(<32 x i8> %x) n
; AVX1-LABEL: test11:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126]
-; AVX1-NEXT: vpminub %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vpcmpeqb %xmm3, %xmm1, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
-; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX1-NEXT: vpsubusb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test11:
@@ -407,15 +377,9 @@ define <32 x i8> @test12(<32 x i8> %x, i
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm3
-; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm4
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
-; AVX1-NEXT: vpmaxub %xmm1, %xmm2, %xmm4
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubusb %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test12:
More information about the llvm-commits
mailing list