[llvm-commits] [llvm] r42433 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h test/CodeGen/X86/memcpy.ll

Chris Lattner clattner at apple.com
Fri Sep 28 10:02:49 PDT 2007


On Sep 28, 2007, at 8:36 AM, Evan Cheng wrote:
> Are you sure this is better? Did you do any measurement? Our goal
> isn't to match gcc output. :)

I think this behavior makes sense.  If the size is variable, it could  
be arbitrarily large.  We should assume that memcpy (the library  
implementation) is tuned as best as possible for handling the unknown  
size case.

-Chris

> Perhaps you can add some unit tests to llvm-test?
>
> Thanks,
>
> Evan
>
> On Sep 28, 2007, at 5:53 AM, Rafael Espindola  
> <rafael.espindola at gmail.com
>> wrote:
>
>> Author: rafael
>> Date: Fri Sep 28 07:53:01 2007
>> New Revision: 42433
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=42433&view=rev
>> Log:
>> Refactor the memcpy lowering for the x86 target.
>>
>> The only generated code difference is that now we call memcpy when
>> the size of the array is unknown. This matches GCC behavior and is
>> better since the run time value can be arbitrarily large.
>>
>>
>> Added:
>>    llvm/trunk/test/CodeGen/X86/memcpy.ll
>> Modified:
>>    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>>    llvm/trunk/lib/Target/X86/X86ISelLowering.h
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/ 
>> X86ISelLowering.cpp?rev=42433&r1=42432&r2=42433&view=diff
>>
>> ===
>> ===
>> ===
>> =====================================================================
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 28
>> 07:53:01 2007
>> @@ -4188,35 +4188,61 @@
>> }
>>
>> SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG
>> &DAG) {
>> -  SDOperand Chain = Op.getOperand(0);
>> -  unsigned Align =
>> -    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
>> +  SDOperand ChainOp = Op.getOperand(0);
>> +  SDOperand DestOp = Op.getOperand(1);
>> +  SDOperand SourceOp = Op.getOperand(2);
>> +  SDOperand CountOp = Op.getOperand(3);
>> +  SDOperand AlignOp = Op.getOperand(4);
>> +  unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)->getValue
>> ();
>>   if (Align == 0) Align = 1;
>>
>> -  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
>> -  // If not DWORD aligned or size is more than the threshold, call
>> memcpy.
>> -  // The libc version is likely to be faster for these cases. It
>> can use the
>> -  // address value and run time information about the CPU.
>> +  // The libc version is likely to be faster for the following
>> cases. It can
>> +  // use the address value and run time information about the CPU.
>>   // With glibc 2.6.1 on a core 2, coping an array of 100M longs was
>> 30% faster
>> -  if ((Align & 3) != 0 ||
>> -      (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold
>> ())) {
>> -    MVT::ValueType IntPtr = getPointerTy();
>> -    TargetLowering::ArgListTy Args;
>> -    TargetLowering::ArgListEntry Entry;
>> -    Entry.Ty = getTargetData()->getIntPtrType();
>> -    Entry.Node = Op.getOperand(1); Args.push_back(Entry);
>> -    Entry.Node = Op.getOperand(2); Args.push_back(Entry);
>> -    Entry.Node = Op.getOperand(3); Args.push_back(Entry);
>> -    std::pair<SDOperand,SDOperand> CallResult =
>> +
>> +  // If not DWORD aligned, call memcpy.
>> +  if ((Align & 3) != 0)
>> +    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
>> +
>> +  // If size is unknown, call memcpy.
>> +  ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
>> +  if (!I)
>> +    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
>> +
>> +  // If size is more than the threshold, call memcpy.
>> +  unsigned Size = I->getValue();
>> +  if (Size > Subtarget->getMinRepStrSizeThreshold())
>> +    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
>> +
>> +  return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align,
>> DAG);
>> +}
>> +
>> +SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain,
>> +                                             SDOperand Dest,
>> +                                             SDOperand Source,
>> +                                             SDOperand Count,
>> +                                             SelectionDAG &DAG) {
>> +  MVT::ValueType IntPtr = getPointerTy();
>> +  TargetLowering::ArgListTy Args;
>> +  TargetLowering::ArgListEntry Entry;
>> +  Entry.Ty = getTargetData()->getIntPtrType();
>> +  Entry.Node = Dest; Args.push_back(Entry);
>> +  Entry.Node = Source; Args.push_back(Entry);
>> +  Entry.Node = Count; Args.push_back(Entry);
>> +  std::pair<SDOperand,SDOperand> CallResult =
>>       LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C,
>> false,
>>                   DAG.getExternalSymbol("memcpy", IntPtr), Args,  
>> DAG);
>> -    return CallResult.second;
>> -  }
>> +  return CallResult.second;
>> +}
>>
>> +SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
>> +                                               SDOperand Dest,
>> +                                               SDOperand Source,
>> +                                               unsigned Size,
>> +                                               unsigned Align,
>> +                                               SelectionDAG &DAG) {
>>   MVT::ValueType AVT;
>> -  SDOperand Count;
>>   unsigned BytesLeft = 0;
>> -  bool TwoRepMovs = false;
>>   switch (Align & 3) {
>>     case 2:   // WORD aligned
>>       AVT = MVT::i16;
>> @@ -4228,33 +4254,22 @@
>>       break;
>>     default:  // Byte aligned
>>       AVT = MVT::i8;
>> -      Count = Op.getOperand(3);
>>       break;
>>   }
>>
>> -  if (AVT > MVT::i8) {
>> -    if (I) {
>> -      unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
>> -      Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy
>> ());
>> -      BytesLeft = I->getValue() % UBytes;
>> -    } else {
>> -      assert(AVT >= MVT::i32 &&
>> -             "Do not use rep;movs if not at least DWORD aligned");
>> -      Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
>> -                          Op.getOperand(3), DAG.getConstant(2,
>> MVT::i8));
>> -      TwoRepMovs = true;
>> -    }
>> -  }
>> +  unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
>> +  SDOperand Count = DAG.getConstant(Size / UBytes, getPointerTy());
>> +  BytesLeft = Size % UBytes;
>>
>>   SDOperand InFlag(0, 0);
>>   Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX :
>> X86::ECX,
>>                             Count, InFlag);
>>   InFlag = Chain.getValue(1);
>>   Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI :
>> X86::EDI,
>> -                            Op.getOperand(1), InFlag);
>> +                            Dest, InFlag);
>>   InFlag = Chain.getValue(1);
>>   Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI :
>> X86::ESI,
>> -                            Op.getOperand(2), InFlag);
>> +                            Source, InFlag);
>>   InFlag = Chain.getValue(1);
>>
>>   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
>> @@ -4264,27 +4279,12 @@
>>   Ops.push_back(InFlag);
>>   Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
>>
>> -  if (TwoRepMovs) {
>> -    InFlag = Chain.getValue(1);
>> -    Count = Op.getOperand(3);
>> -    MVT::ValueType CVT = Count.getValueType();
>> -    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
>> -                               DAG.getConstant((AVT == MVT::i64) ?
>> 7 : 3, CVT));
>> -    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX :
>> X86::ECX,
>> -                              Left, InFlag);
>> -    InFlag = Chain.getValue(1);
>> -    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
>> -    Ops.clear();
>> -    Ops.push_back(Chain);
>> -    Ops.push_back(DAG.getValueType(MVT::i8));
>> -    Ops.push_back(InFlag);
>> -    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
>> -  } else if (BytesLeft) {
>> +  if (BytesLeft) {
>>     // Issue loads and stores for the last 1 - 7 bytes.
>> -    unsigned Offset = I->getValue() - BytesLeft;
>> -    SDOperand DstAddr = Op.getOperand(1);
>> +    unsigned Offset = Size - BytesLeft;
>> +    SDOperand DstAddr = Dest;
>>     MVT::ValueType DstVT = DstAddr.getValueType();
>> -    SDOperand SrcAddr = Op.getOperand(2);
>> +    SDOperand SrcAddr = Source;
>>     MVT::ValueType SrcVT = SrcAddr.getValueType();
>>     SDOperand Value;
>>     if (BytesLeft >= 4) {
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/ 
>> X86ISelLowering.h?rev=42433&r1=42432&r2=42433&view=diff
>>
>> ===
>> ===
>> ===
>> =====================================================================
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Fri Sep 28 07:53:01
>> 2007
>> @@ -437,6 +437,12 @@
>>     SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
>>     SDOperand LowerBRCOND_New(SDOperand Op, SelectionDAG &DAG);
>>     SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);
>> +    SDOperand LowerMEMCPYInline(SDOperand Dest, SDOperand Source,
>> +                                SDOperand Chain, unsigned Size,
>> unsigned Align,
>> +                                SelectionDAG &DAG);
>> +    SDOperand LowerMEMCPYCall(SDOperand ChainOp, SDOperand DestOp,
>> +                              SDOperand SourceOp, SDOperand CountOp,
>> +                              SelectionDAG &DAG);
>>     SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
>>     SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG);
>>     SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
>>
>> Added: llvm/trunk/test/CodeGen/X86/memcpy.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ 
>> X86/memcpy.ll?rev=42433&view=auto
>>
>> ===
>> ===
>> ===
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/memcpy.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/memcpy.ll Fri Sep 28 07:53:01 2007
>> @@ -0,0 +1,17 @@
>> +; RUN: llvm-as < %s | llc -march=x86-64 | grep call.*memcpy |  
>> count 2
>> +
>> +declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
>> +
>> +define i8* @my_memcpy(i8* %a, i8* %b, i64 %n) {
>> +entry:
>> +    tail call void @llvm.memcpy.i64( i8* %a, i8* %b, i64 %n, i32 1 )
>> +    ret i8* %a
>> +}
>> +
>> +define i8* @my_memcpy2(i64* %a, i64* %b, i64 %n) {
>> +entry:
>> +    %tmp14 = bitcast i64* %a to i8*
>> +    %tmp25 = bitcast i64* %b to i8*
>> +    tail call void @llvm.memcpy.i64(i8* %tmp14, i8* %tmp25, i64 %n,
>> i32 8 )
>> +    ret i8* %tmp14
>> +}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list