[llvm-commits] [llvm] r112694 - in /llvm/trunk/lib/Target/X86: X86ISelLowering.cpp X86ISelLowering.h X86InstrFragmentsSIMD.td X86InstrSSE.td
Bruno Cardoso Lopes
bruno.cardoso at gmail.com
Tue Aug 31 22:08:25 PDT 2010
Author: bruno
Date: Wed Sep 1 00:08:25 2010
New Revision: 112694
URL: http://llvm.org/viewvc/llvm-project?rev=112694&view=rev
Log:
Use movlps, movlpd, movss and movsd specific nodes instead of pattern matching with movlp pattern fragment
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=112694&r1=112693&r2=112694&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Sep 1 00:08:25 2010
@@ -2593,7 +2593,10 @@
case X86ISD::SHUFPD:
case X86ISD::SHUFPS:
case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
case X86ISD::MOVSS:
@@ -2648,6 +2651,8 @@
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
case X86ISD::PUNPCKLDQ:
@@ -3664,7 +3669,6 @@
SDValue V = SDValue(N, 0);
EVT VT = V.getValueType();
unsigned Opcode = V.getOpcode();
- int NumElems = VT.getVectorNumElements();
// Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
@@ -3673,6 +3677,7 @@
if (Index < 0)
return DAG.getUNDEF(VT.getVectorElementType());
+ int NumElems = VT.getVectorNumElements();
SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG);
}
@@ -3698,8 +3703,9 @@
if (Opcode == ISD::BIT_CONVERT) {
V = V.getOperand(0);
EVT SrcVT = V.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
- if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != (unsigned)NumElems)
+ if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
return SDValue();
}
@@ -5061,6 +5067,67 @@
return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
}
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+ // operand of these instructions is only memory, so check if there's a
+ // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+ // same masks.
+ bool CanFoldLoad = false;
+ SDValue TmpV1 = V1;
+ SDValue TmpV2 = V2;
+
+ // Trivial case, when V2 is a load.
+ if (TmpV2.getOpcode() == ISD::BIT_CONVERT)
+ TmpV2 = TmpV2.getOperand(0);
+ if (TmpV2.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ TmpV2 = TmpV2.getOperand(0);
+ if (MayFoldLoad(TmpV2))
+ CanFoldLoad = true;
+
+ // When V1 is a load, it can be folded later into a store in isel, example:
+ // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+ // turns into:
+ // (MOVLPSmr addr:$src1, VR128:$src2)
+ // So, recognize this potential and also use MOVLPS or MOVLPD
+ if (TmpV1.getOpcode() == ISD::BIT_CONVERT)
+ TmpV1 = TmpV1.getOperand(0);
+ if (MayFoldLoad(TmpV1))
+ CanFoldLoad = true;
+
+ if (CanFoldLoad) {
+ if (HasSSE2 && NumElems == 2)
+ return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+ if (NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+ }
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ // movl and movlp will both match v2i64, but v2i64 is never matched by
+ // movl earlier because we make it strict to avoid messing with the movlp load
+ // folding logic (see the code above getMOVLP call). Match it here then,
+ // this is horrible, but will stay like this until we move all shuffle
+ // matching to x86 specific nodes. Note that for the 1st condition all
+ // types are matched with movsd.
+ if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+ else if (HasSSE2)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+
+
+ assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+ // Invert the operand order and use SHUFPS to match it.
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
+}
+
SDValue
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -5182,7 +5249,7 @@
return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
if (X86::isMOVLPMask(SVOp))
- return Op;
+ return getMOVLP(Op, dl, DAG, HasSSE2);
}
if (ShouldXformToMOVHLPS(SVOp) ||
@@ -8433,6 +8500,8 @@
case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD";
+ case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
+ case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=112694&r1=112693&r2=112694&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed Sep 1 00:08:25 2010
@@ -269,6 +269,8 @@
MOVLHPD,
MOVHLPS,
MOVHLPD,
+ MOVLPS,
+ MOVLPD,
MOVSD,
MOVSS,
UNPCKLPS,
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=112694&r1=112693&r2=112694&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Wed Sep 1 00:08:25 2010
@@ -134,9 +134,6 @@
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
-def SDTShuff2OpLd : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisPtrTy<2>]>;
-
def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
@@ -161,14 +158,11 @@
def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
-
def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
-def X86MovlpsLd : SDNode<"X86ISD::MOVLPS", SDTShuff2OpLd,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86MovlpdLd : SDNode<"X86ISD::MOVLPD", SDTShuff2OpLd,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=112694&r1=112693&r2=112694&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Sep 1 00:08:25 2010
@@ -5840,9 +5840,9 @@
(MOVSDrr (v2f64 VR128:$src1),
(EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
// Shuffle with MOVSHDUP
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
@@ -5901,7 +5901,25 @@
def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-// Extra patterns to match stores
+// Shuffle with MOVLPS
+def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVLPD
+def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
def : Pat<(store (f64 (vector_extract
(v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
(MOVHPSmr addr:$dst, VR128:$src)>;
@@ -5909,3 +5927,13 @@
(v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
(MOVHPDmr addr:$dst, VR128:$src)>;
+def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
More information about the llvm-commits
mailing list