[llvm-commits] [llvm] r42433 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h test/CodeGen/X86/memcpy.ll

Evan Cheng evan.cheng at apple.com
Fri Sep 28 08:36:03 PDT 2007


Hi Rafael,

Are you sure this is better? Did you do any measurement? Our goal  
isn't to match gcc output. :)

Perhaps you can add some unit tests to llvm-test?

Thanks,

Evan

On Sep 28, 2007, at 5:53 AM, Rafael Espindola <rafael.espindola at gmail.com 
 > wrote:

> Author: rafael
> Date: Fri Sep 28 07:53:01 2007
> New Revision: 42433
>
> URL: http://llvm.org/viewvc/llvm-project?rev=42433&view=rev
> Log:
> Refactor the memcpy lowering for the x86 target.
>
> The only generated code difference is that now we call memcpy when
> the size of the array is unknown. This matches GCC behavior and is
> better since the run time value can be arbitrarily large.
>
>
> Added:
>    llvm/trunk/test/CodeGen/X86/memcpy.ll
> Modified:
>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>    llvm/trunk/lib/Target/X86/X86ISelLowering.h
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=42433&r1=42432&r2=42433&view=diff
>
> === 
> === 
> === 
> =====================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 28  
> 07:53:01 2007
> @@ -4188,35 +4188,61 @@
> }
>
> SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG  
> &DAG) {
> -  SDOperand Chain = Op.getOperand(0);
> -  unsigned Align =
> -    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
> +  SDOperand ChainOp = Op.getOperand(0);
> +  SDOperand DestOp = Op.getOperand(1);
> +  SDOperand SourceOp = Op.getOperand(2);
> +  SDOperand CountOp = Op.getOperand(3);
> +  SDOperand AlignOp = Op.getOperand(4);
> +  unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue 
> ();
>   if (Align == 0) Align = 1;
>
> -  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
> -  // If not DWORD aligned or size is more than the threshold, call  
> memcpy.
> -  // The libc version is likely to be faster for these cases. It  
> can use the
> -  // address value and run time information about the CPU.
> +  // The libc version is likely to be faster for the following  
> cases. It can
> +  // use the address value and run time information about the CPU.
>   // With glibc 2.6.1 on a core 2, coping an array of 100M longs was  
> 30% faster
> -  if ((Align & 3) != 0 ||
> -      (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold 
> ())) {
> -    MVT::ValueType IntPtr = getPointerTy();
> -    TargetLowering::ArgListTy Args;
> -    TargetLowering::ArgListEntry Entry;
> -    Entry.Ty = getTargetData()->getIntPtrType();
> -    Entry.Node = Op.getOperand(1); Args.push_back(Entry);
> -    Entry.Node = Op.getOperand(2); Args.push_back(Entry);
> -    Entry.Node = Op.getOperand(3); Args.push_back(Entry);
> -    std::pair<SDOperand,SDOperand> CallResult =
> +
> +  // If not DWORD aligned, call memcpy.
> +  if ((Align & 3) != 0)
> +    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
> +
> +  // If size is unknown, call memcpy.
> +  ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
> +  if (!I)
> +    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
> +
> +  // If size is more than the threshold, call memcpy.
> +  unsigned Size = I->getValue();
> +  if (Size > Subtarget->getMinRepStrSizeThreshold())
> +    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
> +
> +  return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align,  
> DAG);
> +}
> +
> +SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain,
> +                                             SDOperand Dest,
> +                                             SDOperand Source,
> +                                             SDOperand Count,
> +                                             SelectionDAG &DAG) {
> +  MVT::ValueType IntPtr = getPointerTy();
> +  TargetLowering::ArgListTy Args;
> +  TargetLowering::ArgListEntry Entry;
> +  Entry.Ty = getTargetData()->getIntPtrType();
> +  Entry.Node = Dest; Args.push_back(Entry);
> +  Entry.Node = Source; Args.push_back(Entry);
> +  Entry.Node = Count; Args.push_back(Entry);
> +  std::pair<SDOperand,SDOperand> CallResult =
>       LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C,  
> false,
>                   DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
> -    return CallResult.second;
> -  }
> +  return CallResult.second;
> +}
>
> +SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
> +                                               SDOperand Dest,
> +                                               SDOperand Source,
> +                                               unsigned Size,
> +                                               unsigned Align,
> +                                               SelectionDAG &DAG) {
>   MVT::ValueType AVT;
> -  SDOperand Count;
>   unsigned BytesLeft = 0;
> -  bool TwoRepMovs = false;
>   switch (Align & 3) {
>     case 2:   // WORD aligned
>       AVT = MVT::i16;
> @@ -4228,33 +4254,22 @@
>       break;
>     default:  // Byte aligned
>       AVT = MVT::i8;
> -      Count = Op.getOperand(3);
>       break;
>   }
>
> -  if (AVT > MVT::i8) {
> -    if (I) {
> -      unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
> -      Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy 
> ());
> -      BytesLeft = I->getValue() % UBytes;
> -    } else {
> -      assert(AVT >= MVT::i32 &&
> -             "Do not use rep;movs if not at least DWORD aligned");
> -      Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
> -                          Op.getOperand(3), DAG.getConstant(2,  
> MVT::i8));
> -      TwoRepMovs = true;
> -    }
> -  }
> +  unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
> +  SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy());
> +  BytesLeft = Size % UBytes;
>
>   SDOperand InFlag(0, 0);
>   Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX :  
> X86::ECX,
>                             Count, InFlag);
>   InFlag = Chain.getValue(1);
>   Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI :  
> X86::EDI,
> -                            Op.getOperand(1), InFlag);
> +                            Dest, InFlag);
>   InFlag = Chain.getValue(1);
>   Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI :  
> X86::ESI,
> -                            Op.getOperand(2), InFlag);
> +                            Source, InFlag);
>   InFlag = Chain.getValue(1);
>
>   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
> @@ -4264,27 +4279,12 @@
>   Ops.push_back(InFlag);
>   Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
>
> -  if (TwoRepMovs) {
> -    InFlag = Chain.getValue(1);
> -    Count = Op.getOperand(3);
> -    MVT::ValueType CVT = Count.getValueType();
> -    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
> -                               DAG.getConstant((AVT == MVT::i64) ?  
> 7 : 3, CVT));
> -    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX :  
> X86::ECX,
> -                              Left, InFlag);
> -    InFlag = Chain.getValue(1);
> -    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
> -    Ops.clear();
> -    Ops.push_back(Chain);
> -    Ops.push_back(DAG.getValueType(MVT::i8));
> -    Ops.push_back(InFlag);
> -    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
> -  } else if (BytesLeft) {
> +  if (BytesLeft) {
>     // Issue loads and stores for the last 1 - 7 bytes.
> -    unsigned Offset = I->getValue() - BytesLeft;
> -    SDOperand DstAddr = Op.getOperand(1);
> +    unsigned Offset = Size - BytesLeft;
> +    SDOperand DstAddr = Dest;
>     MVT::ValueType DstVT = DstAddr.getValueType();
> -    SDOperand SrcAddr = Op.getOperand(2);
> +    SDOperand SrcAddr = Source;
>     MVT::ValueType SrcVT = SrcAddr.getValueType();
>     SDOperand Value;
>     if (BytesLeft >= 4) {
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=42433&r1=42432&r2=42433&view=diff
>
> === 
> === 
> === 
> =====================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Fri Sep 28 07:53:01  
> 2007
> @@ -437,6 +437,12 @@
>     SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerBRCOND_New(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);
> +    SDOperand LowerMEMCPYInline(SDOperand Dest, SDOperand Source,
> +                                SDOperand Chain, unsigned Size,  
> unsigned Align,
> +                                SelectionDAG &DAG);
> +    SDOperand LowerMEMCPYCall(SDOperand ChainOp, SDOperand DestOp,
> +                              SDOperand SourceOp, SDOperand CountOp,
> +                              SelectionDAG &DAG);
>     SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG);
>     SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
>
> Added: llvm/trunk/test/CodeGen/X86/memcpy.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcpy.ll?rev=42433&view=auto
>
> === 
> === 
> === 
> =====================================================================
> --- llvm/trunk/test/CodeGen/X86/memcpy.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/memcpy.ll Fri Sep 28 07:53:01 2007
> @@ -0,0 +1,17 @@
> +; RUN: llvm-as < %s | llc -march=x86-64 | grep call.*memcpy | count 2
> +
> +declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
> +
> +define i8* @my_memcpy(i8* %a, i8* %b, i64 %n) {
> +entry:
> +    tail call void @llvm.memcpy.i64( i8* %a, i8* %b, i64 %n, i32 1 )
> +    ret i8* %a
> +}
> +
> +define i8* @my_memcpy2(i64* %a, i64* %b, i64 %n) {
> +entry:
> +    %tmp14 = bitcast i64* %a to i8*
> +    %tmp25 = bitcast i64* %b to i8*
> +    tail call void @llvm.memcpy.i64(i8* %tmp14, i8* %tmp25, i64 %n,  
> i32 8 )
> +    ret i8* %tmp14
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list