[llvm-commits] [llvm] r66776 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/i64-mem-copy.ll test/CodeGen/X86/mmx-copy-gprs.ll
Nick Lewycky
nicholas at mxc.ca
Thu Mar 12 09:31:42 PDT 2009
Evan Cheng wrote:
> Author: evancheng
> Date: Thu Mar 12 00:59:15 2009
> New Revision: 66776
>
> URL: http://llvm.org/viewvc/llvm-project?rev=66776&view=rev
> Log:
> On x86, if the only use of a i64 load is a i64 store, generate a pair of double load and store instead.
>
> Added:
> llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=66776&r1=66775&r2=66776&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Mar 12 00:59:15 2009
> @@ -8285,14 +8285,21 @@
>
> /// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
> static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
> - const X86Subtarget *Subtarget) {
> + const X86Subtarget *Subtarget) {
> // Turn load->store of MMX types into GPR load/stores. This avoids clobbering
> // the FP state in cases where an emms may be missing.
> // A preferable solution to the general problem is to figure out the right
> // places to insert EMMS. This qualifies as a quick hack.
> +
> + // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
> StoreSDNode *St = cast<StoreSDNode>(N);
> - if (St->getValue().getValueType().isVector() &&
> - St->getValue().getValueType().getSizeInBits() == 64 &&
> + MVT VT = St->getValue().getValueType();
> + if (VT.getSizeInBits() != 64)
> + return SDValue();
> +
> + bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2();
> + if ((VT.isVector() ||
> + (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
> isa<LoadSDNode>(St->getValue()) &&
> !cast<LoadSDNode>(St->getValue())->isVolatile() &&
> St->getChain().hasOneUse() && !St->isVolatile()) {
> @@ -8316,60 +8323,72 @@
> Ops.push_back(ChainVal->getOperand(i));
> }
> }
> - if (Ld) {
> - DebugLoc DL = N->getDebugLoc();
> - // If we are a 64-bit capable x86, lower to a single movq load/store pair.
> - if (Subtarget->is64Bit()) {
> - SDValue NewLd = DAG.getLoad(MVT::i64, DL, Ld->getChain(),
> - Ld->getBasePtr(), Ld->getSrcValue(),
> - Ld->getSrcValueOffset(), Ld->isVolatile(),
> - Ld->getAlignment());
> - SDValue NewChain = NewLd.getValue(1);
> - if (TokenFactorIndex != -1) {
> - Ops.push_back(NewChain);
> - NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Ops[0],
> - Ops.size());
> - }
> - return DAG.getStore(NewChain, DL, NewLd, St->getBasePtr(),
> - St->getSrcValue(), St->getSrcValueOffset(),
> - St->isVolatile(), St->getAlignment());
> - }
> -
> - // Otherwise, lower to two 32-bit copies.
> - SDValue LoAddr = Ld->getBasePtr();
> - SDValue HiAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, LoAddr,
> - DAG.getConstant(4, MVT::i32));
> -
> - SDValue LoLd = DAG.getLoad(MVT::i32, DL, Ld->getChain(), LoAddr,
> - Ld->getSrcValue(), Ld->getSrcValueOffset(),
> - Ld->isVolatile(), Ld->getAlignment());
> - SDValue HiLd = DAG.getLoad(MVT::i32, DL, Ld->getChain(), HiAddr,
> - Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
> - Ld->isVolatile(),
> - MinAlign(Ld->getAlignment(), 4));
>
> - SDValue NewChain = LoLd.getValue(1);
> + if (!Ld || !ISD::isNormalLoad(Ld))
> + return SDValue();
> +
> + // If this is not the MMX case, i.e. we are just turning i64 load/store
> + // into f64 load/store, avoid the transformation if there are multiple
> + // uses of the loaded value.
> + if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
> + return SDValue();
> +
> + DebugLoc LdDL = Ld->getDebugLoc();
> + DebugLoc StDL = N->getDebugLoc();
> + // If we are a 64-bit capable x86, lower to a single movq load/store pair.
> + // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
> + // pair instead.
> + if (Subtarget->is64Bit() || F64IsLegal) {
> + MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
> + SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
> + Ld->getBasePtr(), Ld->getSrcValue(),
> + Ld->getSrcValueOffset(), Ld->isVolatile(),
> + Ld->getAlignment());
> + SDValue NewChain = NewLd.getValue(1);
> if (TokenFactorIndex != -1) {
> - Ops.push_back(LoLd);
> - Ops.push_back(HiLd);
> - NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Ops[0],
> + Ops.push_back(NewChain);
> + NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
> Ops.size());
> }
> -
> - LoAddr = St->getBasePtr();
> - HiAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, LoAddr,
> - DAG.getConstant(4, MVT::i32));
> -
> - SDValue LoSt = DAG.getStore(NewChain, DL, LoLd, LoAddr,
> + return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
> St->getSrcValue(), St->getSrcValueOffset(),
> St->isVolatile(), St->getAlignment());
> - SDValue HiSt = DAG.getStore(NewChain, DL, HiLd, HiAddr,
> - St->getSrcValue(),
> - St->getSrcValueOffset() + 4,
> - St->isVolatile(),
> - MinAlign(St->getAlignment(), 4));
> - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LoSt, HiSt);
> }
> +
> + // Otherwise, lower to two pairs of 32-bit loads / stores.
> + SDValue LoAddr = Ld->getBasePtr();
> + SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
> + DAG.getConstant(4, MVT::i32));
> +
> + SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
> + Ld->getSrcValue(), Ld->getSrcValueOffset(),
> + Ld->isVolatile(), Ld->getAlignment());
> + SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
> + Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
> + Ld->isVolatile(),
> + MinAlign(Ld->getAlignment(), 4));
> +
> + SDValue NewChain = LoLd.getValue(1);
> + if (TokenFactorIndex != -1) {
> + Ops.push_back(LoLd);
> + Ops.push_back(HiLd);
> + NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
> + Ops.size());
> + }
> +
> + LoAddr = St->getBasePtr();
> + HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
> + DAG.getConstant(4, MVT::i32));
> +
> + SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
> + St->getSrcValue(), St->getSrcValueOffset(),
> + St->isVolatile(), St->getAlignment());
> + SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
> + St->getSrcValue(),
> + St->getSrcValueOffset() + 4,
> + St->isVolatile(),
> + MinAlign(St->getAlignment(), 4));
> + return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
> }
> return SDValue();
> }
>
> Added: llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll?rev=66776&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll Thu Mar 12 00:59:15 2009
> @@ -0,0 +1,13 @@
> +; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax}
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.*(%eax),}
> +
> +; Uses movsd to load / store i64 values in sse2 is available.
Typo? values if* sse2 is available
Nick
> +
> +; rdar://6659858
> +
> +define void @foo(i64* %x, i64* %y) nounwind {
> +entry:
> + %tmp1 = load i64* %y, align 8 ; <i64> [#uses=1]
> + store i64 %tmp1, i64* %x, align 8
> + ret void
> +}
>
> Modified: llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll?rev=66776&r1=66775&r2=66776&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/mmx-copy-gprs.ll Thu Mar 12 00:59:15 2009
> @@ -1,11 +1,11 @@
> -; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax}
> -; RUN: llvm-as < %s | llc -march=x86 | grep {movl.*4(%eax),}
> +; RUN: llvm-as < %s | llc -march=x86-64 | grep {movq.*(%rsi), %rax}
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=-sse2 | grep {movl.*4(%eax),}
> +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep {movsd.(%eax),}
>
> ; This test should use GPRs to copy the mmx value, not MMX regs. Using mmx regs,
> ; increases the places that need to use emms.
>
> ; rdar://5741668
> -target triple = "x86_64-apple-darwin8"
>
> define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind {
> entry:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list