[llvm] r326030 - [X86][SSE] combineSubToSubus - begun generalizing to work with any type sizes with SplitBinaryOpsAndApply
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 24 04:44:12 PST 2018
Author: rksimon
Date: Sat Feb 24 04:44:12 2018
New Revision: 326030
URL: http://llvm.org/viewvc/llvm-project?rev=326030&view=rev
Log:
[X86][SSE] combineSubToSubus - begun generalizing to work with any type sizes with SplitBinaryOpsAndApply
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/psubus.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=326030&r1=326029&r2=326030&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 24 04:44:12 2018
@@ -37639,7 +37639,7 @@ static SDValue combineSubToSubus(SDNode
// for v8i32 requires umin, which appears in SSE41.
if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) &&
!(Subtarget.hasSSE41() && (VT == MVT::v8i32)) &&
- !(Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)) &&
+ !(Subtarget.hasAVX() && (VT == MVT::v32i8 || VT == MVT::v16i16)) &&
!(Subtarget.useBWIRegs() && (VT == MVT::v64i8 || VT == MVT::v32i16 ||
VT == MVT::v16i32 || VT == MVT::v8i64)))
return SDValue();
@@ -37671,10 +37671,16 @@ static SDValue combineSubToSubus(SDNode
} else
return SDValue();
+ auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
+ SDValue Op1) {
+ return DAG.getNode(X86ISD::SUBUS, DL, Op0.getValueType(), Op0, Op1);
+ };
+
// PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
// special preprocessing in some cases.
if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
- return DAG.getNode(X86ISD::SUBUS, SDLoc(N), VT, SubusLHS, SubusRHS);
+ return SplitBinaryOpsAndApply(DAG, Subtarget, SDLoc(N), VT, SubusLHS,
+ SubusRHS, SUBUSBuilder);
// Special preprocessing case can be only applied
// if the value was zero extended from 16 bit,
@@ -37704,8 +37710,9 @@ static SDValue combineSubToSubus(SDNode
SDValue NewSubusLHS =
DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType);
SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
- SDValue Psubus = DAG.getNode(X86ISD::SUBUS, SDLoc(N), ShrinkedType,
- NewSubusLHS, NewSubusRHS);
+ SDValue Psubus =
+ SplitBinaryOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
+ NewSubusLHS, NewSubusRHS, SUBUSBuilder);
// Zero extend the result, it may be used somewhere as 32 bit,
// if not zext and following trunc will shrink.
return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);
Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=326030&r1=326029&r2=326030&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Sat Feb 24 04:44:12 2018
@@ -1202,16 +1202,16 @@ define <32 x i16> @psubus_32i16_max(<32
;
; AVX1-LABEL: psubus_32i16_max:
; AVX1: # %bb.0: # %vector.ph
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
; AVX1-NEXT: vpsubusw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT: vpsubusw %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpsubusw %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpsubusw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: psubus_32i16_max:
@@ -1242,16 +1242,16 @@ define <64 x i8> @psubus_64i8_max(<64 x
;
; AVX1-LABEL: psubus_64i8_max:
; AVX1: # %bb.0: # %vector.ph
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
; AVX1-NEXT: vpsubusb %xmm4, %xmm5, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
-; AVX1-NEXT: vpsubusb %xmm5, %xmm6, %xmm5
; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpsubusb %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpsubusb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: psubus_64i8_max:
More information about the llvm-commits
mailing list