[llvm-commits] [llvm] r42433 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h test/CodeGen/X86/memcpy.ll
Evan Cheng
evan.cheng at apple.com
Fri Sep 28 08:36:03 PDT 2007
Hi Rafael,
Are you sure this is better? Did you do any measurement? Our goal
isn't to match gcc output. :)
Perhaps you can add some unit tests to llvm-test?
Thanks,
Evan
On Sep 28, 2007, at 5:53 AM, Rafael Espindola <rafael.espindola at gmail.com
> wrote:
> Author: rafael
> Date: Fri Sep 28 07:53:01 2007
> New Revision: 42433
>
> URL: http://llvm.org/viewvc/llvm-project?rev=42433&view=rev
> Log:
> Refactor the memcpy lowering for the x86 target.
>
> The only generated code difference is that now we call memcpy when
> the size of the array is unknown. This matches GCC behavior and is
> better since the run time value can be arbitrarily large.
>
>
> Added:
> llvm/trunk/test/CodeGen/X86/memcpy.ll
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=42433&r1=42432&r2=42433&view=diff
>
> ===
> ===
> ===
> =====================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 28
> 07:53:01 2007
> @@ -4188,35 +4188,61 @@
> }
>
> SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG
> &DAG) {
> - SDOperand Chain = Op.getOperand(0);
> - unsigned Align =
> - (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
> + SDOperand ChainOp = Op.getOperand(0);
> + SDOperand DestOp = Op.getOperand(1);
> + SDOperand SourceOp = Op.getOperand(2);
> + SDOperand CountOp = Op.getOperand(3);
> + SDOperand AlignOp = Op.getOperand(4);
> + unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue
> ();
> if (Align == 0) Align = 1;
>
> - ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
> - // If not DWORD aligned or size is more than the threshold, call
> memcpy.
> - // The libc version is likely to be faster for these cases. It
> can use the
> - // address value and run time information about the CPU.
> + // The libc version is likely to be faster for the following
> cases. It can
> + // use the address value and run time information about the CPU.
> // With glibc 2.6.1 on a core 2, coping an array of 100M longs was
> 30% faster
> - if ((Align & 3) != 0 ||
> - (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold
> ())) {
> - MVT::ValueType IntPtr = getPointerTy();
> - TargetLowering::ArgListTy Args;
> - TargetLowering::ArgListEntry Entry;
> - Entry.Ty = getTargetData()->getIntPtrType();
> - Entry.Node = Op.getOperand(1); Args.push_back(Entry);
> - Entry.Node = Op.getOperand(2); Args.push_back(Entry);
> - Entry.Node = Op.getOperand(3); Args.push_back(Entry);
> - std::pair<SDOperand,SDOperand> CallResult =
> +
> + // If not DWORD aligned, call memcpy.
> + if ((Align & 3) != 0)
> + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
> +
> + // If size is unknown, call memcpy.
> + ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
> + if (!I)
> + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
> +
> + // If size is more than the threshold, call memcpy.
> + unsigned Size = I->getValue();
> + if (Size > Subtarget->getMinRepStrSizeThreshold())
> + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
> +
> + return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align,
> DAG);
> +}
> +
> +SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain,
> + SDOperand Dest,
> + SDOperand Source,
> + SDOperand Count,
> + SelectionDAG &DAG) {
> + MVT::ValueType IntPtr = getPointerTy();
> + TargetLowering::ArgListTy Args;
> + TargetLowering::ArgListEntry Entry;
> + Entry.Ty = getTargetData()->getIntPtrType();
> + Entry.Node = Dest; Args.push_back(Entry);
> + Entry.Node = Source; Args.push_back(Entry);
> + Entry.Node = Count; Args.push_back(Entry);
> + std::pair<SDOperand,SDOperand> CallResult =
> LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C,
> false,
> DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
> - return CallResult.second;
> - }
> + return CallResult.second;
> +}
>
> +SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
> + SDOperand Dest,
> + SDOperand Source,
> + unsigned Size,
> + unsigned Align,
> + SelectionDAG &DAG) {
> MVT::ValueType AVT;
> - SDOperand Count;
> unsigned BytesLeft = 0;
> - bool TwoRepMovs = false;
> switch (Align & 3) {
> case 2: // WORD aligned
> AVT = MVT::i16;
> @@ -4228,33 +4254,22 @@
> break;
> default: // Byte aligned
> AVT = MVT::i8;
> - Count = Op.getOperand(3);
> break;
> }
>
> - if (AVT > MVT::i8) {
> - if (I) {
> - unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
> - Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy
> ());
> - BytesLeft = I->getValue() % UBytes;
> - } else {
> - assert(AVT >= MVT::i32 &&
> - "Do not use rep;movs if not at least DWORD aligned");
> - Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
> - Op.getOperand(3), DAG.getConstant(2,
> MVT::i8));
> - TwoRepMovs = true;
> - }
> - }
> + unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
> + SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy());
> + BytesLeft = Size % UBytes;
>
> SDOperand InFlag(0, 0);
> Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX :
> X86::ECX,
> Count, InFlag);
> InFlag = Chain.getValue(1);
> Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI :
> X86::EDI,
> - Op.getOperand(1), InFlag);
> + Dest, InFlag);
> InFlag = Chain.getValue(1);
> Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI :
> X86::ESI,
> - Op.getOperand(2), InFlag);
> + Source, InFlag);
> InFlag = Chain.getValue(1);
>
> SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
> @@ -4264,27 +4279,12 @@
> Ops.push_back(InFlag);
> Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
>
> - if (TwoRepMovs) {
> - InFlag = Chain.getValue(1);
> - Count = Op.getOperand(3);
> - MVT::ValueType CVT = Count.getValueType();
> - SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
> - DAG.getConstant((AVT == MVT::i64) ?
> 7 : 3, CVT));
> - Chain = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX :
> X86::ECX,
> - Left, InFlag);
> - InFlag = Chain.getValue(1);
> - Tys = DAG.getVTList(MVT::Other, MVT::Flag);
> - Ops.clear();
> - Ops.push_back(Chain);
> - Ops.push_back(DAG.getValueType(MVT::i8));
> - Ops.push_back(InFlag);
> - Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
> - } else if (BytesLeft) {
> + if (BytesLeft) {
> // Issue loads and stores for the last 1 - 7 bytes.
> - unsigned Offset = I->getValue() - BytesLeft;
> - SDOperand DstAddr = Op.getOperand(1);
> + unsigned Offset = Size - BytesLeft;
> + SDOperand DstAddr = Dest;
> MVT::ValueType DstVT = DstAddr.getValueType();
> - SDOperand SrcAddr = Op.getOperand(2);
> + SDOperand SrcAddr = Source;
> MVT::ValueType SrcVT = SrcAddr.getValueType();
> SDOperand Value;
> if (BytesLeft >= 4) {
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=42433&r1=42432&r2=42433&view=diff
>
> ===
> ===
> ===
> =====================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Fri Sep 28 07:53:01
> 2007
> @@ -437,6 +437,12 @@
> SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
> SDOperand LowerBRCOND_New(SDOperand Op, SelectionDAG &DAG);
> SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);
> + SDOperand LowerMEMCPYInline(SDOperand Dest, SDOperand Source,
> + SDOperand Chain, unsigned Size,
> unsigned Align,
> + SelectionDAG &DAG);
> + SDOperand LowerMEMCPYCall(SDOperand ChainOp, SDOperand DestOp,
> + SDOperand SourceOp, SDOperand CountOp,
> + SelectionDAG &DAG);
> SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
> SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG);
> SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
>
> Added: llvm/trunk/test/CodeGen/X86/memcpy.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcpy.ll?rev=42433&view=auto
>
> ===
> ===
> ===
> =====================================================================
> --- llvm/trunk/test/CodeGen/X86/memcpy.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/memcpy.ll Fri Sep 28 07:53:01 2007
> @@ -0,0 +1,17 @@
> +; RUN: llvm-as < %s | llc -march=x86-64 | grep call.*memcpy | count 2
> +
> +declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
> +
> +define i8* @my_memcpy(i8* %a, i8* %b, i64 %n) {
> +entry:
> + tail call void @llvm.memcpy.i64( i8* %a, i8* %b, i64 %n, i32 1 )
> + ret i8* %a
> +}
> +
> +define i8* @my_memcpy2(i64* %a, i64* %b, i64 %n) {
> +entry:
> + %tmp14 = bitcast i64* %a to i8*
> + %tmp25 = bitcast i64* %b to i8*
> + tail call void @llvm.memcpy.i64(i8* %tmp14, i8* %tmp25, i64 %n,
> i32 8 )
> + ret i8* %tmp14
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list