[llvm] r207314 - X86: Custom lower v4i32 UMUL_LOHI into 2 pmuludqs.
Benjamin Kramer
benny.kra at googlemail.com
Sat Apr 26 05:06:11 PDT 2014
Author: d0k
Date: Sat Apr 26 07:06:11 2014
New Revision: 207314
URL: http://llvm.org/viewvc/llvm-project?rev=207314&view=rev
Log:
X86: Custom lower v4i32 UMUL_LOHI into 2 pmuludqs.
Test will follow soon.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=207314&r1=207313&r2=207314&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Apr 26 07:06:11 2014
@@ -826,7 +826,9 @@ void X86TargetLowering::resetOperationAc
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
@@ -938,6 +940,7 @@ void X86TargetLowering::resetOperationAc
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
@@ -1226,6 +1229,8 @@ void X86TargetLowering::resetOperationAc
setOperationAction(ISD::MUL, MVT::v16i16, Legal);
// Don't lower v32i8 because there is no 128-bit byte mul
+ setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
+
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
@@ -13157,6 +13162,37 @@ static SDValue LowerMUL(SDValue Op, cons
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
+static SDValue LowerUMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ EVT VT = Op0.getValueType();
+ SDLoc dl(Op);
+
+ assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
+ (VT == MVT::v8i32 && Subtarget->hasInt256()));
+
+ // Get the high parts.
+ const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
+ SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
+
+ // Emit two multiplies, one for the lower 2 ints and one for the higher 2
+ // ints.
+ MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
+ SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(X86ISD::PMULUDQ, dl, MulVT, Op0, Op1));
+ SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(X86ISD::PMULUDQ, dl, MulVT, Hi0, Hi1));
+
+ // Shuffle it back into the right order.
+ const int HighMask[] = {1, 3, 5, 7, 9, 11, 13, 15};
+ SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
+ const int LowMask[] = {0, 2, 4, 6, 8, 10, 12, 14};
+ SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs, Lows);
+}
+
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
MVT EltTy = VT.getVectorElementType();
@@ -14201,6 +14237,7 @@ SDValue X86TargetLowering::LowerOperatio
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
+ case ISD::UMUL_LOHI: return LowerUMUL_LOHI(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
More information about the llvm-commits
mailing list