[llvm-commits] [llvm] r49572 - in /llvm/trunk: include/llvm/CodeGen/ include/llvm/Target/ lib/CodeGen/SelectionDAG/ lib/Target/ARM/ lib/Target/Alpha/ lib/Target/CellSPU/ lib/Target/IA64/ lib/Target/Mips/ lib/Target/PowerPC/ lib/Target/Sparc/ lib/Target/X86/ test/CodeGen/X86/
Dan Gohman
gohman at apple.com
Mon Apr 14 12:57:50 PDT 2008
I haven't seen any. ISD::MEMCPY wasn't ever being dagcombined, and
it was pretty conservative with its chain edges, so target-specific
nodes aren't very different.
Dan
On Apr 14, 2008, at 11:37 AM, Evan Cheng wrote:
> Thanks. My only concern is lowering these into target specific code
> early might potentially inhibit some optimization. Do you see that
> happening at all?
>
> Evan
>
> On Apr 11, 2008, at 9:36 PM, Dan Gohman wrote:
>
>> Author: djg
>> Date: Fri Apr 11 23:36:06 2008
>> New Revision: 49572
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=49572&view=rev
>> Log:
>> Drop ISD::MEMSET, ISD::MEMMOVE, and ISD::MEMCPY, which are not Legal
>> on any current target and aren't optimized in DAGCombiner. Instead
>> of using intermediate nodes, expand the operations, choosing between
>> simple loads/stores, target-specific code, and library calls,
>> immediately.
>>
>> Previously, the code to emit optimized code for these operations
>> was only used at initial SelectionDAG construction time; now it is
>> used at all times. This fixes some cases where rep;movs was being
>> used for small copies where simple loads/stores would be better.
>>
>> This also cleans up code that checks for alignments less than 4;
>> let the targets make that decision instead of doing it in
>> target-independent code. This allows x86 to use rep;movs in
>> low-alignment cases.
>>
>> Also, this fixes a bug that resulted in the use of rep;stos for
>> memsets of 0 with non-constant memory size when the alignment was
>> at least 4. It's better to use the library in this case, which
>> can be significantly faster when the size is large.
>>
>> This also preserves more SourceValue information when memory
>> intrinsics are lowered into simple loads/stores.
>>
>> Added:
>> llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll
>> llvm/trunk/test/CodeGen/X86/variable-sized-darwin-bzero.ll
>> Modified:
>> llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
>> llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
>> llvm/trunk/include/llvm/Target/TargetLowering.h
>> llvm/trunk/include/llvm/Target/TargetSubtarget.h
>> llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
>> llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
>> llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
>> llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp
>> llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp
>> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
>> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
>> llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
>> llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>> llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>> llvm/trunk/lib/Target/ARM/ARMSubtarget.h
>> llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp
>> llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
>> llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp
>> llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
>> llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
>> llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp
>> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> llvm/trunk/lib/Target/X86/X86ISelLowering.h
>> llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx
>> llvm/trunk/test/CodeGen/X86/byval2.ll
>> llvm/trunk/test/CodeGen/X86/byval3.ll
>> llvm/trunk/test/CodeGen/X86/byval4.ll
>> llvm/trunk/test/CodeGen/X86/byval5.ll
>> llvm/trunk/test/CodeGen/X86/byval7.ll
>>
>> Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
>> +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Fri Apr 11
>> 23:36:06 2008
>> @@ -323,17 +323,20 @@
>> SDOperand getNode(unsigned Opcode, SDVTList VTs,
>> const SDOperand *Ops, unsigned NumOps);
>>
>> - SDOperand getMemcpy(SDOperand Chain, SDOperand Dest, SDOperand
>> Src,
>> - SDOperand Size, SDOperand Align,
>> - SDOperand AlwaysInline);
>> -
>> - SDOperand getMemmove(SDOperand Chain, SDOperand Dest, SDOperand
>> Src,
>> - SDOperand Size, SDOperand Align,
>> - SDOperand AlwaysInline);
>> -
>> - SDOperand getMemset(SDOperand Chain, SDOperand Dest, SDOperand
>> Src,
>> - SDOperand Size, SDOperand Align,
>> - SDOperand AlwaysInline);
>> + SDOperand getMemcpy(SDOperand Chain, SDOperand Dst, SDOperand Src,
>> + SDOperand Size, unsigned Align,
>> + bool AlwaysInline,
>> + Value *DstSV, uint64_t DstOff,
>> + Value *SrcSV, uint64_t SrcOff);
>> +
>> + SDOperand getMemmove(SDOperand Chain, SDOperand Dst, SDOperand
>> Src,
>> + SDOperand Size, unsigned Align,
>> + Value *DstSV, uint64_t DstOff,
>> + Value *SrcSV, uint64_t SrcOff);
>> +
>> + SDOperand getMemset(SDOperand Chain, SDOperand Dst, SDOperand Src,
>> + SDOperand Size, unsigned Align,
>> + Value *DstSV, uint64_t DstOff);
>>
>> /// getSetCC - Helper function to make it easier to build SetCC's
>> if you just
>> /// have an ISD::CondCode instead of an SDOperand.
>>
>> Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original)
>> +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Fri Apr 11
>> 23:36:06 2008
>> @@ -497,14 +497,6 @@
>> // it returns an output chain.
>> STACKRESTORE,
>>
>> - // MEMSET/MEMCPY/MEMMOVE - The first operand is the chain. The
>> following
>> - // correspond to the operands of the LLVM intrinsic functions
>> and the last
>> - // one is AlwaysInline. The only result is a token chain. The
>> alignment
>> - // argument is guaranteed to be a Constant node.
>> - MEMSET,
>> - MEMMOVE,
>> - MEMCPY,
>> -
>> // CALLSEQ_START/CALLSEQ_END - These operators mark the
>> beginning and end of
>> // a call sequence, and carry arbitrary information that target
>> might want
>> // to know. The first operand is a chain, the rest are
>> specified by the
>>
>> Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
>> +++ llvm/trunk/include/llvm/Target/TargetLowering.h Fri Apr 11
>> 23:36:06 2008
>> @@ -948,17 +948,60 @@
>> SDOperand Callee, ArgListTy &Args, SelectionDAG &DAG);
>>
>>
>> - virtual SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
>> - virtual SDOperand LowerMEMCPYCall(SDOperand Chain, SDOperand Dest,
>> - SDOperand Source, SDOperand
>> Count,
>> - SelectionDAG &DAG);
>> - virtual SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand
>> Dest,
>> - SDOperand Source, unsigned
>> Size,
>> - unsigned Align, SelectionDAG
>> &DAG) {
>> - assert(0 && "Not Implemented");
>> - return SDOperand(); // this is here to silence compiler errors
>> + /// EmitTargetCodeForMemcpy - Emit target-specific code that
>> performs a
>> + /// memcpy. This can be used by targets to provide code sequences
>> for cases
>> + /// that don't fit the target's parameters for simple loads/
>> stores and can be
>> + /// more efficient than using a library call. This function can
>> return a null
>> + /// SDOperand if the target declines to use inline code and a
>> different
>> + /// lowering strategy should be used.
>> + ///
>> + /// If AlwaysInline is true, the size is constant and the target
>> should not
>> + /// emit any calls and is strongly encouraged to attempt to emit
>> inline code
>> + /// even if it is beyond the usual threshold because this
>> intrinsic is being
>> + /// expanded in a place where calls are not feasible (e.g. within
>> the prologue
>> + /// for another call). If the target chooses to decline an
>> AlwaysInline
>> + /// request here, legalize will resort to using simple loads and
>> stores.
>> + virtual SDOperand
>> + EmitTargetCodeForMemcpy(SelectionDAG &DAG,
>> + SDOperand Chain,
>> + SDOperand Op1, SDOperand Op2,
>> + SDOperand Op3, unsigned Align,
>> + bool AlwaysInline,
>> + Value *DstSV, uint64_t DstOff,
>> + Value *SrcSV, uint64_t SrcOff) {
>> + return SDOperand();
>> }
>>
>> + /// EmitTargetCodeForMemmove - Emit target-specific code that
>> performs a
>> + /// memmove. This can be used by targets to provide code
>> sequences for cases
>> + /// that don't fit the target's parameters for simple loads/
>> stores and can be
>> + /// more efficient than using a library call. This function can
>> return a null
>> + /// SDOperand if the target declines to use code and a different
>> lowering
>> + /// strategy should be used.
>> + virtual SDOperand
>> + EmitTargetCodeForMemmove(SelectionDAG &DAG,
>> + SDOperand Chain,
>> + SDOperand Op1, SDOperand Op2,
>> + SDOperand Op3, unsigned Align,
>> + Value *DstSV, uint64_t DstOff,
>> + Value *SrcSV, uint64_t SrcOff) {
>> + return SDOperand();
>> + }
>> +
>> + /// EmitTargetCodeForMemset - Emit target-specific code that
>> performs a
>> + /// memset. This can be used by targets to provide code sequences
>> for cases
>> + /// that don't fit the target's parameters for simple stores and
>> can be more
>> + /// efficient than using a library call. This function can return
>> a null
>> + /// SDOperand if the target declines to use code and a different
>> lowering
>> + /// strategy should be used.
>> + virtual SDOperand
>> + EmitTargetCodeForMemset(SelectionDAG &DAG,
>> + SDOperand Chain,
>> + SDOperand Op1, SDOperand Op2,
>> + SDOperand Op3, unsigned Align,
>> + Value *DstSV, uint64_t DstOff) {
>> + return SDOperand();
>> + }
>>
>> /// LowerOperation - This callback is invoked for operations that
>> are
>> /// unsupported by the target, which are registered to use
>> 'custom' lowering,
>>
>> Modified: llvm/trunk/include/llvm/Target/TargetSubtarget.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSubtarget.h?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/include/llvm/Target/TargetSubtarget.h (original)
>> +++ llvm/trunk/include/llvm/Target/TargetSubtarget.h Fri Apr 11
>> 23:36:06 2008
>> @@ -28,9 +28,6 @@
>> protected: // Can only create subclasses...
>> TargetSubtarget();
>> public:
>> - /// getMaxInlineSizeThreshold - Returns the maximum memset /
>> memcpy size
>> - /// that still makes it profitable to inline the call.
>> - virtual unsigned getMaxInlineSizeThreshold() const {return 0; }
>> virtual ~TargetSubtarget();
>> };
>>
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -22,6 +22,7 @@
>> #include "llvm/Target/TargetData.h"
>> #include "llvm/Target/TargetMachine.h"
>> #include "llvm/Target/TargetOptions.h"
>> +#include "llvm/Target/TargetSubtarget.h"
>> #include "llvm/CallingConv.h"
>> #include "llvm/Constants.h"
>> #include "llvm/DerivedTypes.h"
>> @@ -2842,123 +2843,6 @@
>> break;
>> }
>> break;
>> - case ISD::MEMSET:
>> - case ISD::MEMCPY:
>> - case ISD::MEMMOVE: {
>> - Tmp1 = LegalizeOp(Node->getOperand(0)); // Chain
>> - Tmp2 = LegalizeOp(Node->getOperand(1)); // Pointer
>> -
>> - if (Node->getOpcode() == ISD::MEMSET) { // memset = ubyte
>> - switch (getTypeAction(Node->getOperand(2).getValueType())) {
>> - case Expand: assert(0 && "Cannot expand a byte!");
>> - case Legal:
>> - Tmp3 = LegalizeOp(Node->getOperand(2));
>> - break;
>> - case Promote:
>> - Tmp3 = PromoteOp(Node->getOperand(2));
>> - break;
>> - }
>> - } else {
>> - Tmp3 = LegalizeOp(Node->getOperand(2)); // memcpy/move =
>> pointer,
>> - }
>> -
>> - SDOperand Tmp4;
>> - switch (getTypeAction(Node->getOperand(3).getValueType())) {
>> - case Expand: {
>> - // Length is too big, just take the lo-part of the length.
>> - SDOperand HiPart;
>> - ExpandOp(Node->getOperand(3), Tmp4, HiPart);
>> - break;
>> - }
>> - case Legal:
>> - Tmp4 = LegalizeOp(Node->getOperand(3));
>> - break;
>> - case Promote:
>> - Tmp4 = PromoteOp(Node->getOperand(3));
>> - break;
>> - }
>> -
>> - SDOperand Tmp5;
>> - switch (getTypeAction(Node->getOperand(4).getValueType()))
>> { // uint
>> - case Expand: assert(0 && "Cannot expand this yet!");
>> - case Legal:
>> - Tmp5 = LegalizeOp(Node->getOperand(4));
>> - break;
>> - case Promote:
>> - Tmp5 = PromoteOp(Node->getOperand(4));
>> - break;
>> - }
>> -
>> - SDOperand Tmp6;
>> - switch (getTypeAction(Node->getOperand(5).getValueType()))
>> { // bool
>> - case Expand: assert(0 && "Cannot expand this yet!");
>> - case Legal:
>> - Tmp6 = LegalizeOp(Node->getOperand(5));
>> - break;
>> - case Promote:
>> - Tmp6 = PromoteOp(Node->getOperand(5));
>> - break;
>> - }
>> -
>> - switch (TLI.getOperationAction(Node->getOpcode(), MVT::Other)) {
>> - default: assert(0 && "This action not implemented for this
>> operation!");
>> - case TargetLowering::Custom:
>> - isCustom = true;
>> - // FALLTHROUGH
>> - case TargetLowering::Legal: {
>> - SDOperand Ops[] = { Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6 };
>> - Result = DAG.UpdateNodeOperands(Result, Ops, 6);
>> - if (isCustom) {
>> - Tmp1 = TLI.LowerOperation(Result, DAG);
>> - if (Tmp1.Val) Result = Tmp1;
>> - }
>> - break;
>> - }
>> - case TargetLowering::Expand: {
>> - // Otherwise, the target does not support this operation.
>> Lower the
>> - // operation to an explicit libcall as appropriate.
>> - MVT::ValueType IntPtr = TLI.getPointerTy();
>> - const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
>> - TargetLowering::ArgListTy Args;
>> - TargetLowering::ArgListEntry Entry;
>> -
>> - const char *FnName = 0;
>> - if (Node->getOpcode() == ISD::MEMSET) {
>> - Entry.Node = Tmp2; Entry.Ty = IntPtrTy;
>> - Args.push_back(Entry);
>> - // Extend the (previously legalized) ubyte argument to be
>> an int value
>> - // for the call.
>> - if (Tmp3.getValueType() > MVT::i32)
>> - Tmp3 = DAG.getNode(ISD::TRUNCATE, MVT::i32, Tmp3);
>> - else
>> - Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Tmp3);
>> - Entry.Node = Tmp3; Entry.Ty = Type::Int32Ty; Entry.isSExt =
>> true;
>> - Args.push_back(Entry);
>> - Entry.Node = Tmp4; Entry.Ty = IntPtrTy; Entry.isSExt =
>> false;
>> - Args.push_back(Entry);
>> -
>> - FnName = "memset";
>> - } else if (Node->getOpcode() == ISD::MEMCPY ||
>> - Node->getOpcode() == ISD::MEMMOVE) {
>> - Entry.Ty = IntPtrTy;
>> - Entry.Node = Tmp2; Args.push_back(Entry);
>> - Entry.Node = Tmp3; Args.push_back(Entry);
>> - Entry.Node = Tmp4; Args.push_back(Entry);
>> - FnName = Node->getOpcode() == ISD::MEMMOVE ? "memmove" :
>> "memcpy";
>> - } else {
>> - assert(0 && "Unknown op!");
>> - }
>> -
>> - std::pair<SDOperand,SDOperand> CallResult =
>> - TLI.LowerCallTo(Tmp1, Type::VoidTy,
>> - false, false, false, CallingConv::C, false,
>> - DAG.getExternalSymbol(FnName, IntPtr),
>> Args, DAG);
>> - Result = CallResult.second;
>> - break;
>> - }
>> - }
>> - break;
>> - }
>>
>> case ISD::SHL_PARTS:
>> case ISD::SRA_PARTS:
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -439,51 +439,6 @@
>> return DAG.getLoad(DestVT, Store, FIPtr, NULL, 0);
>> }
>>
>> -/// HandleMemIntrinsic - This handles memcpy/memset/memmove with
>> invalid
>> -/// operands. This promotes or expands the operands as required.
>> -SDOperand DAGTypeLegalizer::HandleMemIntrinsic(SDNode *N) {
>> - // The chain and pointer [operands #0 and #1] are always valid
>> types.
>> - SDOperand Chain = N->getOperand(0);
>> - SDOperand Ptr = N->getOperand(1);
>> - SDOperand Op2 = N->getOperand(2);
>> -
>> - // Op #2 is either a value (memset) or a pointer. Promote it if
>> required.
>> - switch (getTypeAction(Op2.getValueType())) {
>> - default: assert(0 && "Unknown action for pointer/value operand");
>> - case Legal: break;
>> - case Promote: Op2 = GetPromotedOp(Op2); break;
>> - }
>> -
>> - // The length could have any action required.
>> - SDOperand Length = N->getOperand(3);
>> - switch (getTypeAction(Length.getValueType())) {
>> - default: assert(0 && "Unknown action for memop operand");
>> - case Legal: break;
>> - case Promote: Length = GetPromotedZExtOp(Length); break;
>> - case Expand:
>> - SDOperand Dummy; // discard the high part.
>> - GetExpandedOp(Length, Length, Dummy);
>> - break;
>> - }
>> -
>> - SDOperand Align = N->getOperand(4);
>> - switch (getTypeAction(Align.getValueType())) {
>> - default: assert(0 && "Unknown action for memop operand");
>> - case Legal: break;
>> - case Promote: Align = GetPromotedZExtOp(Align); break;
>> - }
>> -
>> - SDOperand AlwaysInline = N->getOperand(5);
>> - switch (getTypeAction(AlwaysInline.getValueType())) {
>> - default: assert(0 && "Unknown action for memop operand");
>> - case Legal: break;
>> - case Promote: AlwaysInline = GetPromotedZExtOp(AlwaysInline);
>> break;
>> - }
>> -
>> - SDOperand Ops[] = { Chain, Ptr, Op2, Length, Align,
>> AlwaysInline };
>> - return DAG.UpdateNodeOperands(SDOperand(N, 0), Ops, 6);
>> -}
>> -
>> /// JoinIntegers - Build an integer with low bits Lo and high bits
>> Hi.
>> SDOperand DAGTypeLegalizer::JoinIntegers(SDOperand Lo, SDOperand
>> Hi) {
>> MVT::ValueType LVT = Lo.getValueType();
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Fri Apr 11
>> 23:36:06 2008
>> @@ -165,7 +165,6 @@
>> // Common routines.
>> SDOperand BitConvertToInteger(SDOperand Op);
>> SDOperand CreateStackStoreLoad(SDOperand Op, MVT::ValueType DestVT);
>> - SDOperand HandleMemIntrinsic(SDNode *N);
>> SDOperand JoinIntegers(SDOperand Lo, SDOperand Hi);
>> void SplitInteger(SDOperand Op, SDOperand &Lo, SDOperand &Hi);
>> void SplitInteger(SDOperand Op, MVT::ValueType LoVT,
>> MVT::ValueType HiVT,
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp
>> (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypesExpand.cpp Fri
>> Apr 11 23:36:06 2008
>> @@ -946,9 +946,6 @@
>> case ISD::STORE:
>> Res = ExpandOperand_STORE(cast<StoreSDNode>(N), OpNo);
>> break;
>> - case ISD::MEMSET:
>> - case ISD::MEMCPY:
>> - case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break;
>>
>> case ISD::BUILD_VECTOR: Res = ExpandOperand_BUILD_VECTOR(N);
>> break;
>> }
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/
>> LegalizeTypesPromote.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp
>> (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypesPromote.cpp Fri
>> Apr 11 23:36:06 2008
>> @@ -447,9 +447,6 @@
>>
>> case ISD::STORE: Res =
>> PromoteOperand_STORE(cast<StoreSDNode>(N),
>> OpNo); break;
>> - case ISD::MEMSET:
>> - case ISD::MEMCPY:
>> - case ISD::MEMMOVE: Res = HandleMemIntrinsic(N); break;
>>
>> case ISD::BUILD_VECTOR: Res = PromoteOperand_BUILD_VECTOR(N); break;
>> case ISD::INSERT_VECTOR_ELT:
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -17,6 +17,7 @@
>> #include "llvm/Intrinsics.h"
>> #include "llvm/DerivedTypes.h"
>> #include "llvm/Assembly/Writer.h"
>> +#include "llvm/CallingConv.h"
>> #include "llvm/CodeGen/MachineBasicBlock.h"
>> #include "llvm/CodeGen/MachineConstantPool.h"
>> #include "llvm/CodeGen/MachineFrameInfo.h"
>> @@ -2385,28 +2386,357 @@
>> return getNode(Opcode, VT, Ops, 5);
>> }
>>
>> -SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dest,
>> - SDOperand Src, SDOperand Size,
>> - SDOperand Align,
>> - SDOperand AlwaysInline) {
>> - SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
>> - return getNode(ISD::MEMCPY, MVT::Other, Ops, 6);
>> +/// getMemsetValue - Vectorized representation of the memset value
>> +/// operand.
>> +static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
>> + SelectionDAG &DAG) {
>> + MVT::ValueType CurVT = VT;
>> + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
>> + uint64_t Val = C->getValue() & 255;
>> + unsigned Shift = 8;
>> + while (CurVT != MVT::i8) {
>> + Val = (Val << Shift) | Val;
>> + Shift <<= 1;
>> + CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
>> + }
>> + return DAG.getConstant(Val, VT);
>> + } else {
>> + Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
>> + unsigned Shift = 8;
>> + while (CurVT != MVT::i8) {
>> + Value =
>> + DAG.getNode(ISD::OR, VT,
>> + DAG.getNode(ISD::SHL, VT, Value,
>> + DAG.getConstant(Shift, MVT::i8)),
>> Value);
>> + Shift <<= 1;
>> + CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
>> + }
>> +
>> + return Value;
>> + }
>> }
>>
>> -SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dest,
>> +/// getMemsetStringVal - Similar to getMemsetValue. Except this is
>> only
>> +/// used when a memcpy is turned into a memset when the source is a
>> constant
>> +/// string ptr.
>> +static SDOperand getMemsetStringVal(MVT::ValueType VT,
>> + SelectionDAG &DAG,
>> + const TargetLowering &TLI,
>> + std::string &Str, unsigned
>> Offset) {
>> + uint64_t Val = 0;
>> + unsigned MSB = MVT::getSizeInBits(VT) / 8;
>> + if (TLI.isLittleEndian())
>> + Offset = Offset + MSB - 1;
>> + for (unsigned i = 0; i != MSB; ++i) {
>> + Val = (Val << 8) | (unsigned char)Str[Offset];
>> + Offset += TLI.isLittleEndian() ? -1 : 1;
>> + }
>> + return DAG.getConstant(Val, VT);
>> +}
>> +
>> +/// getMemBasePlusOffset - Returns base and offset node for the
>> +static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned
>> Offset,
>> + SelectionDAG &DAG) {
>> + MVT::ValueType VT = Base.getValueType();
>> + return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset,
>> VT));
>> +}
>> +
>> +/// MeetsMaxMemopRequirement - Determines if the number of memory
>> ops required
>> +/// to replace the memset / memcpy is below the threshold. It also
>> returns the
>> +/// types of the sequence of memory ops to perform memset / memcpy.
>> +static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType>
>> &MemOps,
>> + unsigned Limit, uint64_t Size,
>> + unsigned Align,
>> + const TargetLowering &TLI) {
>> + MVT::ValueType VT;
>> +
>> + if (TLI.allowsUnalignedMemoryAccesses()) {
>> + VT = MVT::i64;
>> + } else {
>> + switch (Align & 7) {
>> + case 0:
>> + VT = MVT::i64;
>> + break;
>> + case 4:
>> + VT = MVT::i32;
>> + break;
>> + case 2:
>> + VT = MVT::i16;
>> + break;
>> + default:
>> + VT = MVT::i8;
>> + break;
>> + }
>> + }
>> +
>> + MVT::ValueType LVT = MVT::i64;
>> + while (!TLI.isTypeLegal(LVT))
>> + LVT = (MVT::ValueType)((unsigned)LVT - 1);
>> + assert(MVT::isInteger(LVT));
>> +
>> + if (VT > LVT)
>> + VT = LVT;
>> +
>> + unsigned NumMemOps = 0;
>> + while (Size != 0) {
>> + unsigned VTSize = MVT::getSizeInBits(VT) / 8;
>> + while (VTSize > Size) {
>> + VT = (MVT::ValueType)((unsigned)VT - 1);
>> + VTSize >>= 1;
>> + }
>> + assert(MVT::isInteger(VT));
>> +
>> + if (++NumMemOps > Limit)
>> + return false;
>> + MemOps.push_back(VT);
>> + Size -= VTSize;
>> + }
>> +
>> + return true;
>> +}
>> +
>> +static SDOperand getMemcpyLoadsAndStores(SelectionDAG &DAG,
>> + SDOperand Chain, SDOperand
>> Dst,
>> + SDOperand Src, uint64_t
>> Size,
>> + unsigned Align,
>> + bool AlwaysInline,
>> + Value *DstSV, uint64_t
>> DstOff,
>> + Value *SrcSV, uint64_t
>> SrcOff) {
>> + const TargetLowering &TLI = DAG.getTargetLoweringInfo();
>> +
>> + // Expand memcpy to a series of store ops if the size operand
>> falls below
>> + // a certain threshold.
>> + std::vector<MVT::ValueType> MemOps;
>> + uint64_t Limit = -1;
>> + if (!AlwaysInline)
>> + Limit = TLI.getMaxStoresPerMemcpy();
>> + if (!MeetsMaxMemopRequirement(MemOps, Limit, Size, Align, TLI))
>> + return SDOperand();
>> +
>> + SmallVector<SDOperand, 8> OutChains;
>> +
>> + unsigned NumMemOps = MemOps.size();
>> + unsigned SrcDelta = 0;
>> + GlobalAddressSDNode *G = NULL;
>> + std::string Str;
>> + bool CopyFromStr = false;
>> +
>> + if (Src.getOpcode() == ISD::GlobalAddress)
>> + G = cast<GlobalAddressSDNode>(Src);
>> + else if (Src.getOpcode() == ISD::ADD &&
>> + Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
>> + Src.getOperand(1).getOpcode() == ISD::Constant) {
>> + G = cast<GlobalAddressSDNode>(Src.getOperand(0));
>> + SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getValue();
>> + }
>> + if (G) {
>> + GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
>> + if (GV && GV->isConstant()) {
>> + Str = GV->getStringValue(false);
>> + if (!Str.empty()) {
>> + CopyFromStr = true;
>> + SrcOff += SrcDelta;
>> + }
>> + }
>> + }
>> +
>> + for (unsigned i = 0; i < NumMemOps; i++) {
>> + MVT::ValueType VT = MemOps[i];
>> + unsigned VTSize = MVT::getSizeInBits(VT) / 8;
>> + SDOperand Value, Store;
>> +
>> + if (CopyFromStr) {
>> + Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
>> + Store =
>> + DAG.getStore(Chain, Value,
>> + getMemBasePlusOffset(Dst, DstOff, DAG),
>> + DstSV, DstOff);
>> + } else {
>> + Value = DAG.getLoad(VT, Chain,
>> + getMemBasePlusOffset(Src, SrcOff, DAG),
>> + SrcSV, SrcOff, false, Align);
>> + Store =
>> + DAG.getStore(Chain, Value,
>> + getMemBasePlusOffset(Dst, DstOff, DAG),
>> + DstSV, DstOff, false, Align);
>> + }
>> + OutChains.push_back(Store);
>> + SrcOff += VTSize;
>> + DstOff += VTSize;
>> + }
>> +
>> + return DAG.getNode(ISD::TokenFactor, MVT::Other,
>> + &OutChains[0], OutChains.size());
>> +}
>> +
>> +static SDOperand getMemsetStores(SelectionDAG &DAG,
>> + SDOperand Chain, SDOperand Dst,
>> + SDOperand Src, uint64_t Size,
>> + unsigned Align,
>> + Value *DstSV, uint64_t DstOff) {
>> + const TargetLowering &TLI = DAG.getTargetLoweringInfo();
>> +
>> + // Expand memset to a series of load/store ops if the size operand
>> + // falls below a certain threshold.
>> + std::vector<MVT::ValueType> MemOps;
>> + if (!MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(),
>> + Size, Align, TLI))
>> + return SDOperand();
>> +
>> + SmallVector<SDOperand, 8> OutChains;
>> +
>> + unsigned NumMemOps = MemOps.size();
>> + for (unsigned i = 0; i < NumMemOps; i++) {
>> + MVT::ValueType VT = MemOps[i];
>> + unsigned VTSize = MVT::getSizeInBits(VT) / 8;
>> + SDOperand Value = getMemsetValue(Src, VT, DAG);
>> + SDOperand Store = DAG.getStore(Chain, Value,
>> + getMemBasePlusOffset(Dst,
>> DstOff, DAG),
>> + DstSV, DstOff);
>> + OutChains.push_back(Store);
>> + DstOff += VTSize;
>> + }
>> +
>> + return DAG.getNode(ISD::TokenFactor, MVT::Other,
>> + &OutChains[0], OutChains.size());
>> +}
>> +
>> +SDOperand SelectionDAG::getMemcpy(SDOperand Chain, SDOperand Dst,
>> SDOperand Src, SDOperand Size,
>> - SDOperand Align,
>> - SDOperand AlwaysInline) {
>> - SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
>> - return getNode(ISD::MEMMOVE, MVT::Other, Ops, 6);
>> + unsigned Align, bool AlwaysInline,
>> + Value *DstSV, uint64_t DstOff,
>> + Value *SrcSV, uint64_t SrcOff) {
>> +
>> + // Check to see if we should lower the memcpy to loads and stores
>> first.
>> + // For cases within the target-specified limits, this is the best
>> choice.
>> + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
>> + if (ConstantSize) {
>> + // Memcpy with size zero? Just return the original chain.
>> + if (ConstantSize->isNullValue())
>> + return Chain;
>> +
>> + SDOperand Result =
>> + getMemcpyLoadsAndStores(*this, Chain, Dst, Src, ConstantSize-
>>> getValue(),
>> + Align, false, DstSV, DstOff, SrcSV,
>> SrcOff);
>> + if (Result.Val)
>> + return Result;
>> + }
>> +
>> + // Then check to see if we should lower the memcpy with target-
>> specific
>> + // code. If the target chooses to do this, this is the next best.
>> + SDOperand Result =
>> + TLI.EmitTargetCodeForMemcpy(*this, Chain, Dst, Src, Size, Align,
>> + AlwaysInline,
>> + DstSV, DstOff, SrcSV, SrcOff);
>> + if (Result.Val)
>> + return Result;
>> +
>> + // If we really need inline code and the target declined to
>> provide it,
>> + // use a (potentially long) sequence of loads and stores.
>> + if (AlwaysInline) {
>> + assert(ConstantSize && "AlwaysInline requires a constant
>> size!");
>> + return getMemcpyLoadsAndStores(*this, Chain, Dst, Src,
>> + ConstantSize->getValue(), Align,
>> true,
>> + DstSV, DstOff, SrcSV, SrcOff);
>> + }
>> +
>> + // Emit a library call.
>> + TargetLowering::ArgListTy Args;
>> + TargetLowering::ArgListEntry Entry;
>> + Entry.Ty = TLI.getTargetData()->getIntPtrType();
>> + Entry.Node = Dst; Args.push_back(Entry);
>> + Entry.Node = Src; Args.push_back(Entry);
>> + Entry.Node = Size; Args.push_back(Entry);
>> + std::pair<SDOperand,SDOperand> CallResult =
>> + TLI.LowerCallTo(Chain, Type::VoidTy,
>> + false, false, false, CallingConv::C, false,
>> + getExternalSymbol("memcpy", TLI.getPointerTy()),
>> + Args, *this);
>> + return CallResult.second;
>> +}
>> +
>> +SDOperand SelectionDAG::getMemmove(SDOperand Chain, SDOperand Dst,
>> + SDOperand Src, SDOperand Size,
>> + unsigned Align,
>> + Value *DstSV, uint64_t DstOff,
>> + Value *SrcSV, uint64_t SrcOff) {
>> +
>> + // TODO: Optimize small memmove cases with simple loads and
>> stores,
>> + // ensuring that all loads precede all stores. This can cause
>> severe
>> + // register pressure, so targets should be careful with the size
>> limit.
>> +
>> + // Then check to see if we should lower the memmove with target-
>> specific
>> + // code. If the target chooses to do this, this is the next best.
>> + SDOperand Result =
>> + TLI.EmitTargetCodeForMemmove(*this, Chain, Dst, Src, Size,
>> Align,
>> + DstSV, DstOff, SrcSV, SrcOff);
>> + if (Result.Val)
>> + return Result;
>> +
>> + // Emit a library call.
>> + TargetLowering::ArgListTy Args;
>> + TargetLowering::ArgListEntry Entry;
>> + Entry.Ty = TLI.getTargetData()->getIntPtrType();
>> + Entry.Node = Dst; Args.push_back(Entry);
>> + Entry.Node = Src; Args.push_back(Entry);
>> + Entry.Node = Size; Args.push_back(Entry);
>> + std::pair<SDOperand,SDOperand> CallResult =
>> + TLI.LowerCallTo(Chain, Type::VoidTy,
>> + false, false, false, CallingConv::C, false,
>> + getExternalSymbol("memmove",
>> TLI.getPointerTy()),
>> + Args, *this);
>> + return CallResult.second;
>> }
>>
>> -SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dest,
>> +SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dst,
>> SDOperand Src, SDOperand Size,
>> - SDOperand Align,
>> - SDOperand AlwaysInline) {
>> - SDOperand Ops[] = { Chain, Dest, Src, Size, Align, AlwaysInline };
>> - return getNode(ISD::MEMSET, MVT::Other, Ops, 6);
>> + unsigned Align,
>> + Value *DstSV, uint64_t DstOff) {
>> +
>> + // Check to see if we should lower the memset to stores first.
>> + // For cases within the target-specified limits, this is the best
>> choice.
>> + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
>> + if (ConstantSize) {
>> + // Memset with size zero? Just return the original chain.
>> + if (ConstantSize->isNullValue())
>> + return Chain;
>> +
>> + SDOperand Result =
>> + getMemsetStores(*this, Chain, Dst, Src, ConstantSize-
>>> getValue(), Align,
>> + DstSV, DstOff);
>> + if (Result.Val)
>> + return Result;
>> + }
>> +
>> + // Then check to see if we should lower the memset with target-
>> specific
>> + // code. If the target chooses to do this, this is the next best.
>> + SDOperand Result =
>> + TLI.EmitTargetCodeForMemset(*this, Chain, Dst, Src, Size, Align,
>> + DstSV, DstOff);
>> + if (Result.Val)
>> + return Result;
>> +
>> + // Emit a library call.
>> + const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType();
>> + TargetLowering::ArgListTy Args;
>> + TargetLowering::ArgListEntry Entry;
>> + Entry.Node = Dst; Entry.Ty = IntPtrTy;
>> + Args.push_back(Entry);
>> + // Extend or truncate the argument to be an i32 value for the
>> call.
>> + if (Src.getValueType() > MVT::i32)
>> + Src = getNode(ISD::TRUNCATE, MVT::i32, Src);
>> + else
>> + Src = getNode(ISD::ZERO_EXTEND, MVT::i32, Src);
>> + Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true;
>> + Args.push_back(Entry);
>> + Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false;
>> + Args.push_back(Entry);
>> + std::pair<SDOperand,SDOperand> CallResult =
>> + TLI.LowerCallTo(Chain, Type::VoidTy,
>> + false, false, false, CallingConv::C, false,
>> + getExternalSymbol("memset", TLI.getPointerTy()),
>> + Args, *this);
>> + return CallResult.second;
>> }
>>
>> SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain,
>> @@ -4009,11 +4339,6 @@
>> case ISD::STACKRESTORE: return "stackrestore";
>> case ISD::TRAP: return "trap";
>>
>> - // Block memory operations.
>> - case ISD::MEMSET: return "memset";
>> - case ISD::MEMCPY: return "memcpy";
>> - case ISD::MEMMOVE: return "memmove";
>> -
>> // Bit manipulation
>> case ISD::BSWAP: return "bswap";
>> case ISD::CTPOP: return "ctpop";
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
>> (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Fri Apr
>> 11 23:36:06 2008
>> @@ -647,8 +647,6 @@
>> void visitVAEnd(CallInst &I);
>> void visitVACopy(CallInst &I);
>>
>> - void visitMemIntrinsic(CallInst &I, unsigned Op);
>> -
>> void visitGetResult(GetResultInst &I);
>>
>> void visitUserOp1(Instruction &I) {
>> @@ -2737,18 +2735,48 @@
>> return "_longjmp"+!TLI.usesUnderscoreLongJmp();
>> break;
>> case Intrinsic::memcpy_i32:
>> - case Intrinsic::memcpy_i64:
>> - visitMemIntrinsic(I, ISD::MEMCPY);
>> + case Intrinsic::memcpy_i64: {
>> + SDOperand Op1 = getValue(I.getOperand(1));
>> + SDOperand Op2 = getValue(I.getOperand(2));
>> + SDOperand Op3 = getValue(I.getOperand(3));
>> + unsigned Align = cast<ConstantInt>(I.getOperand(4))-
>>> getZExtValue();
>> + DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align,
>> false,
>> + I.getOperand(1), 0, I.getOperand(2),
>> 0));
>> return 0;
>> + }
>> case Intrinsic::memset_i32:
>> - case Intrinsic::memset_i64:
>> - visitMemIntrinsic(I, ISD::MEMSET);
>> + case Intrinsic::memset_i64: {
>> + SDOperand Op1 = getValue(I.getOperand(1));
>> + SDOperand Op2 = getValue(I.getOperand(2));
>> + SDOperand Op3 = getValue(I.getOperand(3));
>> + unsigned Align = cast<ConstantInt>(I.getOperand(4))-
>>> getZExtValue();
>> + DAG.setRoot(DAG.getMemset(getRoot(), Op1, Op2, Op3, Align,
>> + I.getOperand(1), 0));
>> return 0;
>> + }
>> case Intrinsic::memmove_i32:
>> - case Intrinsic::memmove_i64:
>> - visitMemIntrinsic(I, ISD::MEMMOVE);
>> + case Intrinsic::memmove_i64: {
>> + SDOperand Op1 = getValue(I.getOperand(1));
>> + SDOperand Op2 = getValue(I.getOperand(2));
>> + SDOperand Op3 = getValue(I.getOperand(3));
>> + unsigned Align = cast<ConstantInt>(I.getOperand(4))-
>>> getZExtValue();
>> +
>> + // If the source and destination are known to not be aliases,
>> we can
>> + // lower memmove as memcpy.
>> + uint64_t Size = -1ULL;
>> + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
>> + Size = C->getValue();
>> + if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
>> + AliasAnalysis::NoAlias) {
>> + DAG.setRoot(DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Align,
>> false,
>> + I.getOperand(1), 0,
>> I.getOperand(2), 0));
>> + return 0;
>> + }
>> +
>> + DAG.setRoot(DAG.getMemmove(getRoot(), Op1, Op2, Op3, Align,
>> + I.getOperand(1), 0, I.getOperand(2),
>> 0));
>> return 0;
>> -
>> + }
>> case Intrinsic::dbg_stoppoint: {
>> MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
>> DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
>> @@ -4342,242 +4370,6 @@
>> return SDOperand();
>> }
>>
>> -/// getMemsetValue - Vectorized representation of the memset value
>> -/// operand.
>> -static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT,
>> - SelectionDAG &DAG) {
>> - MVT::ValueType CurVT = VT;
>> - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
>> - uint64_t Val = C->getValue() & 255;
>> - unsigned Shift = 8;
>> - while (CurVT != MVT::i8) {
>> - Val = (Val << Shift) | Val;
>> - Shift <<= 1;
>> - CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
>> - }
>> - return DAG.getConstant(Val, VT);
>> - } else {
>> - Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value);
>> - unsigned Shift = 8;
>> - while (CurVT != MVT::i8) {
>> - Value =
>> - DAG.getNode(ISD::OR, VT,
>> - DAG.getNode(ISD::SHL, VT, Value,
>> - DAG.getConstant(Shift, MVT::i8)),
>> Value);
>> - Shift <<= 1;
>> - CurVT = (MVT::ValueType)((unsigned)CurVT - 1);
>> - }
>> -
>> - return Value;
>> - }
>> -}
>> -
>> -/// getMemsetStringVal - Similar to getMemsetValue. Except this is
>> only
>> -/// used when a memcpy is turned into a memset when the source is a
>> constant
>> -/// string ptr.
>> -static SDOperand getMemsetStringVal(MVT::ValueType VT,
>> - SelectionDAG &DAG,
>> TargetLowering &TLI,
>> - std::string &Str, unsigned
>> Offset) {
>> - uint64_t Val = 0;
>> - unsigned MSB = MVT::getSizeInBits(VT) / 8;
>> - if (TLI.isLittleEndian())
>> - Offset = Offset + MSB - 1;
>> - for (unsigned i = 0; i != MSB; ++i) {
>> - Val = (Val << 8) | (unsigned char)Str[Offset];
>> - Offset += TLI.isLittleEndian() ? -1 : 1;
>> - }
>> - return DAG.getConstant(Val, VT);
>> -}
>> -
>> -/// getMemBasePlusOffset - Returns base and offset node for the
>> -static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned
>> Offset,
>> - SelectionDAG &DAG,
>> TargetLowering &TLI) {
>> - MVT::ValueType VT = Base.getValueType();
>> - return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset,
>> VT));
>> -}
>> -
>> -/// MeetsMaxMemopRequirement - Determines if the number of memory
>> ops required
>> -/// to replace the memset / memcpy is below the threshold. It also
>> returns the
>> -/// types of the sequence of memory ops to perform memset / memcpy.
>> -static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType>
>> &MemOps,
>> - unsigned Limit, uint64_t Size,
>> - unsigned Align, TargetLowering
>> &TLI) {
>> - MVT::ValueType VT;
>> -
>> - if (TLI.allowsUnalignedMemoryAccesses()) {
>> - VT = MVT::i64;
>> - } else {
>> - switch (Align & 7) {
>> - case 0:
>> - VT = MVT::i64;
>> - break;
>> - case 4:
>> - VT = MVT::i32;
>> - break;
>> - case 2:
>> - VT = MVT::i16;
>> - break;
>> - default:
>> - VT = MVT::i8;
>> - break;
>> - }
>> - }
>> -
>> - MVT::ValueType LVT = MVT::i64;
>> - while (!TLI.isTypeLegal(LVT))
>> - LVT = (MVT::ValueType)((unsigned)LVT - 1);
>> - assert(MVT::isInteger(LVT));
>> -
>> - if (VT > LVT)
>> - VT = LVT;
>> -
>> - unsigned NumMemOps = 0;
>> - while (Size != 0) {
>> - unsigned VTSize = MVT::getSizeInBits(VT) / 8;
>> - while (VTSize > Size) {
>> - VT = (MVT::ValueType)((unsigned)VT - 1);
>> - VTSize >>= 1;
>> - }
>> - assert(MVT::isInteger(VT));
>> -
>> - if (++NumMemOps > Limit)
>> - return false;
>> - MemOps.push_back(VT);
>> - Size -= VTSize;
>> - }
>> -
>> - return true;
>> -}
>> -
>> -void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned
>> Op) {
>> - SDOperand Op1 = getValue(I.getOperand(1));
>> - SDOperand Op2 = getValue(I.getOperand(2));
>> - SDOperand Op3 = getValue(I.getOperand(3));
>> - SDOperand Op4 = getValue(I.getOperand(4));
>> - unsigned Align = (unsigned)cast<ConstantSDNode>(Op4)->getValue();
>> - if (Align == 0) Align = 1;
>> -
>> - // If the source and destination are known to not be aliases, we
>> can
>> - // lower memmove as memcpy.
>> - if (Op == ISD::MEMMOVE) {
>> - uint64_t Size = -1ULL;
>> - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
>> - Size = C->getValue();
>> - if (AA.alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
>> - AliasAnalysis::NoAlias)
>> - Op = ISD::MEMCPY;
>> - }
>> -
>> - if (ConstantSDNode *Size = dyn_cast<ConstantSDNode>(Op3)) {
>> - std::vector<MVT::ValueType> MemOps;
>> -
>> - // Expand memset / memcpy to a series of load / store ops
>> - // if the size operand falls below a certain threshold.
>> - SmallVector<SDOperand, 8> OutChains;
>> - switch (Op) {
>> - default: break; // Do nothing for now.
>> - case ISD::MEMSET: {
>> - if (MeetsMaxMemopRequirement(MemOps,
>> TLI.getMaxStoresPerMemset(),
>> - Size->getValue(), Align, TLI)) {
>> - unsigned NumMemOps = MemOps.size();
>> - unsigned Offset = 0;
>> - for (unsigned i = 0; i < NumMemOps; i++) {
>> - MVT::ValueType VT = MemOps[i];
>> - unsigned VTSize = MVT::getSizeInBits(VT) / 8;
>> - SDOperand Value = getMemsetValue(Op2, VT, DAG);
>> - SDOperand Store = DAG.getStore(getRoot(), Value,
>> - getMemBasePlusOffset(Op1,
>> Offset, DAG, TLI),
>> - I.getOperand(1), Offset);
>> - OutChains.push_back(Store);
>> - Offset += VTSize;
>> - }
>> - }
>> - break;
>> - }
>> - case ISD::MEMCPY: {
>> - if (MeetsMaxMemopRequirement(MemOps,
>> TLI.getMaxStoresPerMemcpy(),
>> - Size->getValue(), Align, TLI)) {
>> - unsigned NumMemOps = MemOps.size();
>> - unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0;
>> - GlobalAddressSDNode *G = NULL;
>> - std::string Str;
>> - bool CopyFromStr = false;
>> -
>> - if (Op2.getOpcode() == ISD::GlobalAddress)
>> - G = cast<GlobalAddressSDNode>(Op2);
>> - else if (Op2.getOpcode() == ISD::ADD &&
>> - Op2.getOperand(0).getOpcode() ==
>> ISD::GlobalAddress &&
>> - Op2.getOperand(1).getOpcode() == ISD::Constant) {
>> - G = cast<GlobalAddressSDNode>(Op2.getOperand(0));
>> - SrcDelta = cast<ConstantSDNode>(Op2.getOperand(1))-
>>> getValue();
>> - }
>> - if (G) {
>> - GlobalVariable *GV = dyn_cast<GlobalVariable>(G-
>>> getGlobal());
>> - if (GV && GV->isConstant()) {
>> - Str = GV->getStringValue(false);
>> - if (!Str.empty()) {
>> - CopyFromStr = true;
>> - SrcOff += SrcDelta;
>> - }
>> - }
>> - }
>> -
>> - for (unsigned i = 0; i < NumMemOps; i++) {
>> - MVT::ValueType VT = MemOps[i];
>> - unsigned VTSize = MVT::getSizeInBits(VT) / 8;
>> - SDOperand Value, Chain, Store;
>> -
>> - if (CopyFromStr) {
>> - Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff);
>> - Chain = getRoot();
>> - Store =
>> - DAG.getStore(Chain, Value,
>> - getMemBasePlusOffset(Op1, DstOff, DAG,
>> TLI),
>> - I.getOperand(1), DstOff);
>> - } else {
>> - Value = DAG.getLoad(VT, getRoot(),
>> - getMemBasePlusOffset(Op2, SrcOff,
>> DAG, TLI),
>> - I.getOperand(2), SrcOff, false,
>> Align);
>> - Chain = Value.getValue(1);
>> - Store =
>> - DAG.getStore(Chain, Value,
>> - getMemBasePlusOffset(Op1, DstOff, DAG,
>> TLI),
>> - I.getOperand(1), DstOff, false, Align);
>> - }
>> - OutChains.push_back(Store);
>> - SrcOff += VTSize;
>> - DstOff += VTSize;
>> - }
>> - }
>> - break;
>> - }
>> - }
>> -
>> - if (!OutChains.empty()) {
>> - DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other,
>> - &OutChains[0], OutChains.size()));
>> - return;
>> - }
>> - }
>> -
>> - SDOperand AlwaysInline = DAG.getConstant(0, MVT::i1);
>> - SDOperand Node;
>> - switch(Op) {
>> - default:
>> - assert(0 && "Unknown Op");
>> - case ISD::MEMCPY:
>> - Node = DAG.getMemcpy(getRoot(), Op1, Op2, Op3, Op4,
>> AlwaysInline);
>> - break;
>> - case ISD::MEMMOVE:
>> - Node = DAG.getMemmove(getRoot(), Op1, Op2, Op3, Op4,
>> AlwaysInline);
>> - break;
>> - case ISD::MEMSET:
>> - Node = DAG.getMemset(getRoot(), Op1, Op2, Op3, Op4,
>> AlwaysInline);
>> - break;
>> - }
>> - DAG.setRoot(Node);
>> -}
>> -
>> //
>> =
>> =
>> =
>> ----------------------------------------------------------------------=
>> ==//
>> // SelectionDAGISel code
>> //
>> =
>> =
>> =
>> ----------------------------------------------------------------------=
>> ==//
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Fri Apr
>> 11 23:36:06 2008
>> @@ -17,7 +17,7 @@
>> #include "llvm/Target/TargetData.h"
>> #include "llvm/Target/TargetMachine.h"
>> #include "llvm/Target/TargetRegisterInfo.h"
>> -#include "llvm/CallingConv.h"
>> +#include "llvm/GlobalVariable.h"
>> #include "llvm/DerivedTypes.h"
>> #include "llvm/CodeGen/SelectionDAG.h"
>> #include "llvm/ADT/StringExtras.h"
>> @@ -234,59 +234,6 @@
>>
>> TargetLowering::~TargetLowering() {}
>>
>> -
>> -SDOperand TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG
>> &DAG) {
>> - assert(getSubtarget() && "Subtarget not defined");
>> - SDOperand ChainOp = Op.getOperand(0);
>> - SDOperand DestOp = Op.getOperand(1);
>> - SDOperand SourceOp = Op.getOperand(2);
>> - SDOperand CountOp = Op.getOperand(3);
>> - SDOperand AlignOp = Op.getOperand(4);
>> - SDOperand AlwaysInlineOp = Op.getOperand(5);
>> -
>> - bool AlwaysInline = (bool)cast<ConstantSDNode>(AlwaysInlineOp)-
>>> getValue();
>> - unsigned Align = (unsigned)cast<ConstantSDNode>(AlignOp)-
>>> getValue();
>> - if (Align == 0) Align = 1;
>> -
>> - // If size is unknown, call memcpy.
>> - ConstantSDNode *I = dyn_cast<ConstantSDNode>(CountOp);
>> - if (!I) {
>> - assert(!AlwaysInline && "Cannot inline copy of unknown size");
>> - return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
>> - }
>> -
>> - // If not DWORD aligned or if size is more than threshold, then
>> call memcpy.
>> - // The libc version is likely to be faster for the following
>> cases. It can
>> - // use the address value and run time information about the CPU.
>> - // With glibc 2.6.1 on a core 2, coping an array of 100M longs
>> was 30% faster
>> - unsigned Size = I->getValue();
>> - if (AlwaysInline ||
>> - (Size <= getSubtarget()->getMaxInlineSizeThreshold() &&
>> - (Align & 3) == 0))
>> - return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size,
>> Align, DAG);
>> - return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
>> -}
>> -
>> -
>> -SDOperand TargetLowering::LowerMEMCPYCall(SDOperand Chain,
>> - SDOperand Dest,
>> - SDOperand Source,
>> - SDOperand Count,
>> - SelectionDAG &DAG) {
>> - MVT::ValueType IntPtr = getPointerTy();
>> - TargetLowering::ArgListTy Args;
>> - TargetLowering::ArgListEntry Entry;
>> - Entry.Ty = getTargetData()->getIntPtrType();
>> - Entry.Node = Dest; Args.push_back(Entry);
>> - Entry.Node = Source; Args.push_back(Entry);
>> - Entry.Node = Count; Args.push_back(Entry);
>> - std::pair<SDOperand,SDOperand> CallResult =
>> - LowerCallTo(Chain, Type::VoidTy, false, false, false,
>> CallingConv::C,
>> - false, DAG.getExternalSymbol("memcpy", IntPtr),
>> Args, DAG);
>> - return CallResult.second;
>> -}
>> -
>> -
>> /// computeRegisterProperties - Once all of the register classes are
>> added,
>> /// this allows us to compute derived properties we expose.
>> void TargetLowering::computeRegisterProperties() {
>>
>> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -197,11 +197,6 @@
>> setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
>> setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
>>
>> - // Expand mem operations genericly.
>> - setOperationAction(ISD::MEMSET , MVT::Other, Expand);
>> - setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
>> - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
>> -
>> // Use the default implementation.
>> setOperationAction(ISD::VASTART , MVT::Other, Custom);
>> setOperationAction(ISD::VAARG , MVT::Other, Expand);
>> @@ -1246,18 +1241,30 @@
>> return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, CCR,
>> Cmp);
>> }
>>
>> -SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain,
>> - SDOperand Dest,
>> - SDOperand Source,
>> - unsigned Size,
>> - unsigned Align,
>> - SelectionDAG &DAG) {
>> +SDOperand
>> +ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
>> + SDOperand Chain,
>> + SDOperand Dst, SDOperand
>> Src,
>> + SDOperand Size, unsigned
>> Align,
>> + bool AlwaysInline,
>> + Value *DstSV, uint64_t
>> DstOff,
>> + Value *SrcSV, uint64_t
>> SrcOff){
>> // Do repeated 4-byte loads and stores. To be improved.
>> - assert((Align & 3) == 0 && "Expected 4-byte aligned addresses!");
>> - unsigned BytesLeft = Size & 3;
>> - unsigned NumMemOps = Size >> 2;
>> + // This requires 4-byte alignment.
>> + if ((Align & 3) != 0)
>> + return SDOperand();
>> + // This requires the copy size to be a constant, preferrably
>> + // within a subtarget-specific limit.
>> + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
>> + if (!ConstantSize)
>> + return SDOperand();
>> + uint64_t SizeVal = ConstantSize->getValue();
>> + if (!AlwaysInline && SizeVal > getSubtarget()-
>>> getMaxInlineSizeThreshold())
>> + return SDOperand();
>> +
>> + unsigned BytesLeft = SizeVal & 3;
>> + unsigned NumMemOps = SizeVal >> 2;
>> unsigned EmittedNumMemOps = 0;
>> - unsigned SrcOff = 0, DstOff = 0;
>> MVT::ValueType VT = MVT::i32;
>> unsigned VTSize = 4;
>> unsigned i = 0;
>> @@ -1272,9 +1279,9 @@
>> for (i = 0;
>> i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; +
>> +i) {
>> Loads[i] = DAG.getLoad(VT, Chain,
>> - DAG.getNode(ISD::ADD, MVT::i32, Source,
>> + DAG.getNode(ISD::ADD, MVT::i32, Src,
>> DAG.getConstant(SrcOff,
>> MVT::i32)),
>> - NULL, 0);
>> + SrcSV, SrcOff);
>> TFOps[i] = Loads[i].getValue(1);
>> SrcOff += VTSize;
>> }
>> @@ -1283,9 +1290,9 @@
>> for (i = 0;
>> i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; +
>> +i) {
>> TFOps[i] = DAG.getStore(Chain, Loads[i],
>> - DAG.getNode(ISD::ADD, MVT::i32, Dest,
>> + DAG.getNode(ISD::ADD, MVT::i32, Dst,
>> DAG.getConstant(DstOff,
>> MVT::i32)),
>> - NULL, 0);
>> + DstSV, DstOff);
>> DstOff += VTSize;
>> }
>> Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i);
>> @@ -1309,9 +1316,9 @@
>> }
>>
>> Loads[i] = DAG.getLoad(VT, Chain,
>> - DAG.getNode(ISD::ADD, MVT::i32, Source,
>> + DAG.getNode(ISD::ADD, MVT::i32, Src,
>> DAG.getConstant(SrcOff,
>> MVT::i32)),
>> - NULL, 0);
>> + SrcSV, SrcOff);
>> TFOps[i] = Loads[i].getValue(1);
>> ++i;
>> SrcOff += VTSize;
>> @@ -1331,9 +1338,9 @@
>> }
>>
>> TFOps[i] = DAG.getStore(Chain, Loads[i],
>> - DAG.getNode(ISD::ADD, MVT::i32, Dest,
>> + DAG.getNode(ISD::ADD, MVT::i32, Dst,
>> DAG.getConstant(DstOff,
>> MVT::i32)),
>> - NULL, 0);
>> + DstSV, DstOff);
>> ++i;
>> DstOff += VTSize;
>> BytesLeft -= VTSize;
>> @@ -1409,7 +1416,6 @@
>> case ISD::RETURNADDR: break;
>> case ISD::FRAMEADDR: break;
>> case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op,
>> DAG);
>> - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
>> case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op,
>> DAG);
>>
>>
>>
>> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Fri Apr 11 23:36:06
>> 2008
>> @@ -119,8 +119,8 @@
>> getRegClassForInlineAsmConstraint(const std::string &Constraint,
>> MVT::ValueType VT) const;
>>
>> - virtual const TargetSubtarget* getSubtarget() {
>> - return static_cast<const TargetSubtarget*>(Subtarget);
>> + virtual const ARMSubtarget* getSubtarget() {
>> + return Subtarget;
>> }
>>
>> private:
>> @@ -143,11 +143,14 @@
>> SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG
>> &DAG);
>> SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
>> SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
>> - SDOperand LowerMEMCPYInline(SDOperand Chain, SDOperand Dest,
>> - SDOperand Source, unsigned Size,
>> - unsigned Align, SelectionDAG &DAG);
>> -
>>
>> + SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG,
>> + SDOperand Chain,
>> + SDOperand Dst, SDOperand Src,
>> + SDOperand Size, unsigned
>> Align,
>> + bool AlwaysInline,
>> + Value *DstSV, uint64_t DstOff,
>> + Value *SrcSV, uint64_t
>> SrcOff);
>> };
>> }
>>
>>
>> Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original)
>> +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Fri Apr 11 23:36:06 2008
>> @@ -62,6 +62,8 @@
>> ///
>> ARMSubtarget(const Module &M, const std::string &FS, bool thumb);
>>
>> + /// getMaxInlineSizeThreshold - Returns the maximum memset /
>> memcpy size
>> + /// that still makes it profitable to inline the call.
>> unsigned getMaxInlineSizeThreshold() const {
>> // FIXME: For now, we don't lower memcpy's to loads / stores for
>> Thumb.
>> // Change this once Thumb ldmia / stmia support is added.
>>
>> Modified: llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/Alpha/AlphaISelLowering.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -87,10 +87,6 @@
>> setOperationAction(ISD::SDIV , MVT::i64, Custom);
>> setOperationAction(ISD::UDIV , MVT::i64, Custom);
>>
>> - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
>> - setOperationAction(ISD::MEMSET , MVT::Other, Expand);
>> - setOperationAction(ISD::MEMCPY , MVT::Other, Expand);
>> -
>> // We don't support sin/cos/sqrt/pow
>> setOperationAction(ISD::FSIN , MVT::f64, Expand);
>> setOperationAction(ISD::FCOS , MVT::f64, Expand);
>>
>> Modified: llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/CellSPU/SPUISelLowering.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -175,9 +175,6 @@
>> setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
>>
>> // SPU has no intrinsics for these particular operations:
>> - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
>> - setOperationAction(ISD::MEMSET, MVT::Other, Expand);
>> - setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
>> setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
>>
>> // PowerPC has no SREM/UREM instructions
>>
>> Modified: llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/IA64/IA64ISelLowering.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -65,9 +65,6 @@
>> setOperationAction(ISD::UREM , MVT::f32 , Expand);
>> setOperationAction(ISD::UREM , MVT::f64 , Expand);
>>
>> - setOperationAction(ISD::MEMMOVE , MVT::Other,
>> Expand);
>> - setOperationAction(ISD::MEMSET , MVT::Other,
>> Expand);
>> - setOperationAction(ISD::MEMCPY , MVT::Other,
>> Expand);
>> setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
>>
>> setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
>>
>> Modified: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -80,9 +80,6 @@
>> setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
>>
>> // Mips not supported intrinsics.
>> - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
>> - setOperationAction(ISD::MEMSET, MVT::Other, Expand);
>> - setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
>> setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
>>
>> setOperationAction(ISD::CTPOP, MVT::i32, Expand);
>>
>> Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -78,9 +78,6 @@
>> setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
>>
>> // PowerPC has no intrinsics for these particular operations
>> - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
>> - setOperationAction(ISD::MEMSET, MVT::Other, Expand);
>> - setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
>> setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
>>
>> // PowerPC has no SREM/UREM instructions
>> @@ -1735,10 +1732,9 @@
>> CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand
>> Chain,
>> ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
>> unsigned Size) {
>> - SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(),
>> MVT::i32);
>> - SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
>> - SDOperand AlwaysInline = DAG.getConstant(0, MVT::i32);
>> - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode,
>> AlwaysInline);
>> + SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
>> + return DAG.getMemcpy(Chain, Dst, Src, SizeNode,
>> Flags.getByValAlign(), false,
>> + NULL, 0, NULL, 0);
>> }
>>
>> SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG
>> &DAG,
>>
>> Modified: llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -570,9 +570,6 @@
>> setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
>>
>> // SPARC has no intrinsics for these particular operations.
>> - setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
>> - setOperationAction(ISD::MEMSET, MVT::Other, Expand);
>> - setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
>> setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
>>
>> setOperationAction(ISD::FSIN , MVT::f64, Expand);
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Apr 11
>> 23:36:06 2008
>> @@ -206,7 +206,6 @@
>> setOperationAction(ISD::BRCOND , MVT::Other, Custom);
>> setOperationAction(ISD::BR_CC , MVT::Other, Expand);
>> setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
>> - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
>> if (Subtarget->is64Bit())
>> setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
>> setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
>> @@ -281,9 +280,6 @@
>> setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
>> setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
>> }
>> - // X86 wants to expand memset / memcpy itself.
>> - setOperationAction(ISD::MEMSET , MVT::Other, Custom);
>> - setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
>>
>> if (Subtarget->hasSSE1())
>> setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
>> @@ -1113,10 +1109,10 @@
>> static SDOperand
>> CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand
>> Chain,
>> ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
>> - SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(),
>> MVT::i32);
>> SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(),
>> MVT::i32);
>> - SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
>> - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode,
>> AlwaysInline);
>> + return DAG.getMemcpy(Chain, Dst, Src, SizeNode,
>> Flags.getByValAlign(),
>> + /*AlwaysInline=*/true,
>> + NULL, 0, NULL, 0);
>> }
>>
>> SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op,
>> SelectionDAG &DAG,
>> @@ -4557,52 +4553,51 @@
>> return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
>> }
>>
>> -SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG
>> &DAG) {
>> - SDOperand InFlag(0, 0);
>> - SDOperand Chain = Op.getOperand(0);
>> - unsigned Align =
>> - (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
>> - if (Align == 0) Align = 1;
>> -
>> - ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
>> - // If not DWORD aligned or size is more than the threshold, call
>> memset.
>> - // The libc version is likely to be faster for these cases. It
>> can use the
>> - // address value and run time information about the CPU.
>> - if ((Align & 3) != 0 ||
>> - (I && I->getValue() > Subtarget-
>>> getMaxInlineSizeThreshold())) {
>> +SDOperand
>> +X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
>> + SDOperand Chain,
>> + SDOperand Dst, SDOperand
>> Src,
>> + SDOperand Size, unsigned
>> Align,
>> + Value *DstSV, uint64_t
>> DstOff) {
>> + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
>> +
>> + /// If not DWORD aligned or size is more than the threshold, call
>> the library.
>> + /// The libc version is likely to be faster for these cases. It
>> can use the
>> + /// address value and run time information about the CPU.
>> + if ((Align & 3) == 0 ||
>> + !ConstantSize ||
>> + ConstantSize->getValue() > getSubtarget()-
>>> getMaxInlineSizeThreshold()) {
>> + SDOperand InFlag(0, 0);
>>
>> // Check to see if there is a specialized entry-point for memory
>> zeroing.
>> - ConstantSDNode *V = dyn_cast<ConstantSDNode>(Op.getOperand(2));
>> - const char *bzeroEntry =
>> - V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0;
>> -
>> - MVT::ValueType IntPtr = getPointerTy();
>> - const Type *IntPtrTy = getTargetData()->getIntPtrType();
>> - TargetLowering::ArgListTy Args;
>> - TargetLowering::ArgListEntry Entry;
>> - Entry.Node = Op.getOperand(1);
>> - Entry.Ty = IntPtrTy;
>> - Args.push_back(Entry);
>> -
>> - if (!bzeroEntry) {
>> - // Extend the unsigned i8 argument to be an int value for the
>> call.
>> - Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32,
>> Op.getOperand(2));
>> + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
>> + if (const char *bzeroEntry =
>> + V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
>> + MVT::ValueType IntPtr = getPointerTy();
>> + const Type *IntPtrTy = getTargetData()->getIntPtrType();
>> + TargetLowering::ArgListTy Args;
>> + TargetLowering::ArgListEntry Entry;
>> + Entry.Node = Dst;
>> Entry.Ty = IntPtrTy;
>> Args.push_back(Entry);
>> + Entry.Node = Size;
>> + Args.push_back(Entry);
>> + std::pair<SDOperand,SDOperand> CallResult =
>> + LowerCallTo(Chain, Type::VoidTy, false, false, false,
>> CallingConv::C,
>> + false, DAG.getExternalSymbol(bzeroEntry,
>> IntPtr),
>> + Args, DAG);
>> + return CallResult.second;
>> }
>>
>> - Entry.Node = Op.getOperand(3);
>> - Args.push_back(Entry);
>> - const char *Name = bzeroEntry ? bzeroEntry : "memset";
>> - std::pair<SDOperand,SDOperand> CallResult =
>> - LowerCallTo(Chain, Type::VoidTy, false, false, false,
>> CallingConv::C,
>> - false, DAG.getExternalSymbol(Name, IntPtr), Args,
>> DAG);
>> - return CallResult.second;
>> + // Otherwise have the target-independent code call memset.
>> + return SDOperand();
>> }
>>
>> + uint64_t SizeVal = ConstantSize->getValue();
>> + SDOperand InFlag(0, 0);
>> MVT::ValueType AVT;
>> SDOperand Count;
>> - ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
>> + ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
>> unsigned BytesLeft = 0;
>> bool TwoRepStos = false;
>> if (ValC) {
>> @@ -4630,22 +4625,14 @@
>> default: // Byte aligned
>> AVT = MVT::i8;
>> ValReg = X86::AL;
>> - Count = Op.getOperand(3);
>> + Count = Size;
>> break;
>> }
>>
>> if (AVT > MVT::i8) {
>> - if (I) {
>> - unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
>> - Count = DAG.getIntPtrConstant(I->getValue() / UBytes);
>> - BytesLeft = I->getValue() % UBytes;
>> - } else {
>> - assert(AVT >= MVT::i32 &&
>> - "Do not use rep;stos if not at least DWORD aligned");
>> - Count = DAG.getNode(ISD::SRL,
>> Op.getOperand(3).getValueType(),
>> - Op.getOperand(3), DAG.getConstant(2,
>> MVT::i8));
>> - TwoRepStos = true;
>> - }
>> + unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
>> + Count = DAG.getIntPtrConstant(SizeVal / UBytes);
>> + BytesLeft = SizeVal % UBytes;
>> }
>>
>> Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val,
>> AVT),
>> @@ -4653,8 +4640,8 @@
>> InFlag = Chain.getValue(1);
>> } else {
>> AVT = MVT::i8;
>> - Count = Op.getOperand(3);
>> - Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2),
>> InFlag);
>> + Count = Size;
>> + Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag);
>> InFlag = Chain.getValue(1);
>> }
>>
>> @@ -4662,7 +4649,7 @@
>> Count, InFlag);
>> InFlag = Chain.getValue(1);
>> Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI :
>> X86::EDI,
>> - Op.getOperand(1), InFlag);
>> + Dst, InFlag);
>> InFlag = Chain.getValue(1);
>>
>> SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
>> @@ -4674,7 +4661,7 @@
>>
>> if (TwoRepStos) {
>> InFlag = Chain.getValue(1);
>> - Count = Op.getOperand(3);
>> + Count = Size;
>> MVT::ValueType CVT = Count.getValueType();
>> SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
>> DAG.getConstant((AVT == MVT::i64) ?
>> 7 : 3, CVT));
>> @@ -4688,79 +4675,68 @@
>> Ops.push_back(InFlag);
>> Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
>> } else if (BytesLeft) {
>> - // Issue stores for the last 1 - 7 bytes.
>> - SDOperand Value;
>> - unsigned Val = ValC->getValue() & 255;
>> - unsigned Offset = I->getValue() - BytesLeft;
>> - SDOperand DstAddr = Op.getOperand(1);
>> - MVT::ValueType AddrVT = DstAddr.getValueType();
>> - if (BytesLeft >= 4) {
>> - Val = (Val << 8) | Val;
>> - Val = (Val << 16) | Val;
>> - Value = DAG.getConstant(Val, MVT::i32);
>> - Chain = DAG.getStore(Chain, Value,
>> - DAG.getNode(ISD::ADD, AddrVT, DstAddr,
>> - DAG.getConstant(Offset,
>> AddrVT)),
>> - NULL, 0);
>> - BytesLeft -= 4;
>> - Offset += 4;
>> - }
>> - if (BytesLeft >= 2) {
>> - Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
>> - Chain = DAG.getStore(Chain, Value,
>> - DAG.getNode(ISD::ADD, AddrVT, DstAddr,
>> - DAG.getConstant(Offset,
>> AddrVT)),
>> - NULL, 0);
>> - BytesLeft -= 2;
>> - Offset += 2;
>> - }
>> - if (BytesLeft == 1) {
>> - Value = DAG.getConstant(Val, MVT::i8);
>> - Chain = DAG.getStore(Chain, Value,
>> - DAG.getNode(ISD::ADD, AddrVT, DstAddr,
>> - DAG.getConstant(Offset,
>> AddrVT)),
>> - NULL, 0);
>> - }
>> + // Handle the last 1 - 7 bytes.
>> + unsigned Offset = SizeVal - BytesLeft;
>> + MVT::ValueType AddrVT = Dst.getValueType();
>> + MVT::ValueType SizeVT = Size.getValueType();
>> +
>> + Chain = DAG.getMemset(Chain,
>> + DAG.getNode(ISD::ADD, AddrVT, Dst,
>> + DAG.getConstant(Offset,
>> AddrVT)),
>> + Src,
>> + DAG.getConstant(BytesLeft, SizeVT),
>> + Align, DstSV, Offset);
>> }
>>
>> + // TODO: Use a Tokenfactor, as in memcpy, instead of a single
>> chain.
>> return Chain;
>> }
>>
>> -SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
>> - SDOperand Dest,
>> - SDOperand Source,
>> - unsigned Size,
>> - unsigned Align,
>> - SelectionDAG &DAG) {
>> +SDOperand
>> +X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
>> + SDOperand Chain,
>> + SDOperand Dst, SDOperand
>> Src,
>> + SDOperand Size, unsigned
>> Align,
>> + bool AlwaysInline,
>> + Value *DstSV, uint64_t
>> DstOff,
>> + Value *SrcSV, uint64_t
>> SrcOff){
>> +
>> + // This requires the copy size to be a constant, preferrably
>> + // within a subtarget-specific limit.
>> + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
>> + if (!ConstantSize)
>> + return SDOperand();
>> + uint64_t SizeVal = ConstantSize->getValue();
>> + if (!AlwaysInline && SizeVal > getSubtarget()-
>>> getMaxInlineSizeThreshold())
>> + return SDOperand();
>> +
>> + SmallVector<SDOperand, 4> Results;
>> +
>> MVT::ValueType AVT;
>> unsigned BytesLeft = 0;
>> - switch (Align & 3) {
>> - case 2: // WORD aligned
>> - AVT = MVT::i16;
>> - break;
>> - case 0: // DWORD aligned
>> - AVT = MVT::i32;
>> - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD
>> aligned
>> - AVT = MVT::i64;
>> - break;
>> - default: // Byte aligned
>> - AVT = MVT::i8;
>> - break;
>> - }
>> + if (Align >= 8 && Subtarget->is64Bit())
>> + AVT = MVT::i64;
>> + else if (Align >= 4)
>> + AVT = MVT::i32;
>> + else if (Align >= 2)
>> + AVT = MVT::i16;
>> + else
>> + AVT = MVT::i8;
>>
>> unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
>> - SDOperand Count = DAG.getIntPtrConstant(Size / UBytes);
>> - BytesLeft = Size % UBytes;
>> + unsigned CountVal = SizeVal / UBytes;
>> + SDOperand Count = DAG.getIntPtrConstant(CountVal);
>> + BytesLeft = SizeVal % UBytes;
>>
>> SDOperand InFlag(0, 0);
>> Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX :
>> X86::ECX,
>> Count, InFlag);
>> InFlag = Chain.getValue(1);
>> Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI :
>> X86::EDI,
>> - Dest, InFlag);
>> + Dst, InFlag);
>> InFlag = Chain.getValue(1);
>> Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI :
>> X86::ESI,
>> - Source, InFlag);
>> + Src, InFlag);
>> InFlag = Chain.getValue(1);
>>
>> SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
>> @@ -4768,57 +4744,28 @@
>> Ops.push_back(Chain);
>> Ops.push_back(DAG.getValueType(AVT));
>> Ops.push_back(InFlag);
>> - Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
>> + Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0],
>> Ops.size()));
>>
>> if (BytesLeft) {
>> - // Issue loads and stores for the last 1 - 7 bytes.
>> - unsigned Offset = Size - BytesLeft;
>> - SDOperand DstAddr = Dest;
>> - MVT::ValueType DstVT = DstAddr.getValueType();
>> - SDOperand SrcAddr = Source;
>> - MVT::ValueType SrcVT = SrcAddr.getValueType();
>> - SDOperand Value;
>> - if (BytesLeft >= 4) {
>> - Value = DAG.getLoad(MVT::i32, Chain,
>> - DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
>> - DAG.getConstant(Offset,
>> SrcVT)),
>> - NULL, 0);
>> - Chain = Value.getValue(1);
>> - Chain = DAG.getStore(Chain, Value,
>> - DAG.getNode(ISD::ADD, DstVT, DstAddr,
>> - DAG.getConstant(Offset,
>> DstVT)),
>> - NULL, 0);
>> - BytesLeft -= 4;
>> - Offset += 4;
>> - }
>> - if (BytesLeft >= 2) {
>> - Value = DAG.getLoad(MVT::i16, Chain,
>> - DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
>> - DAG.getConstant(Offset,
>> SrcVT)),
>> - NULL, 0);
>> - Chain = Value.getValue(1);
>> - Chain = DAG.getStore(Chain, Value,
>> - DAG.getNode(ISD::ADD, DstVT, DstAddr,
>> - DAG.getConstant(Offset,
>> DstVT)),
>> - NULL, 0);
>> - BytesLeft -= 2;
>> - Offset += 2;
>> - }
>> -
>> - if (BytesLeft == 1) {
>> - Value = DAG.getLoad(MVT::i8, Chain,
>> - DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
>> - DAG.getConstant(Offset,
>> SrcVT)),
>> - NULL, 0);
>> - Chain = Value.getValue(1);
>> - Chain = DAG.getStore(Chain, Value,
>> - DAG.getNode(ISD::ADD, DstVT, DstAddr,
>> - DAG.getConstant(Offset,
>> DstVT)),
>> - NULL, 0);
>> - }
>> + // Handle the last 1 - 7 bytes.
>> + unsigned Offset = SizeVal - BytesLeft;
>> + MVT::ValueType DstVT = Dst.getValueType();
>> + MVT::ValueType SrcVT = Src.getValueType();
>> + MVT::ValueType SizeVT = Size.getValueType();
>> +
>> + Results.push_back(DAG.getMemcpy(Chain,
>> + DAG.getNode(ISD::ADD, DstVT,
>> Dst,
>> +
>> DAG.getConstant(Offset,
>> +
>> DstVT)),
>> + DAG.getNode(ISD::ADD, SrcVT,
>> Src,
>> +
>> DAG.getConstant(Offset,
>> +
>> SrcVT)),
>> + DAG.getConstant(BytesLeft,
>> SizeVT),
>> + Align, AlwaysInline,
>> + DstSV, Offset, SrcSV, Offset));
>> }
>>
>> - return Chain;
>> + return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0],
>> Results.size());
>> }
>>
>> /// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
>> @@ -5430,8 +5377,6 @@
>> case ISD::CALL: return LowerCALL(Op, DAG);
>> case ISD::RET: return LowerRET(Op, DAG);
>> case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
>> - case ISD::MEMSET: return LowerMEMSET(Op, DAG);
>> - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
>> case ISD::VASTART: return LowerVASTART(Op, DAG);
>> case ISD::VACOPY: return LowerVACOPY(Op, DAG);
>> case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op,
>> DAG);
>>
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Fri Apr 11 23:36:06
>> 2008
>> @@ -441,8 +441,8 @@
>> SDOperand Ret,
>> SelectionDAG
>> &DAG) const;
>>
>> - virtual const TargetSubtarget* getSubtarget() {
>> - return static_cast<const TargetSubtarget*>(Subtarget);
>> + virtual const X86Subtarget* getSubtarget() {
>> + return Subtarget;
>> }
>>
>> /// isScalarFPTypeInSSEReg - Return true if the specified scalar
>> FP type is
>> @@ -512,9 +512,6 @@
>> SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG);
>> SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
>> SDOperand LowerMEMSET(SDOperand Op, SelectionDAG &DAG);
>> - SDOperand LowerMEMCPYInline(SDOperand Dest, SDOperand Source,
>> - SDOperand Chain, unsigned Size,
>> unsigned Align,
>> - SelectionDAG &DAG);
>> SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG);
>> SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG);
>> SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG);
>> @@ -535,6 +532,19 @@
>> SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG);
>> SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG);
>> SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG);
>> +
>> + SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG,
>> + SDOperand Chain,
>> + SDOperand Dst, SDOperand Src,
>> + SDOperand Size, unsigned
>> Align,
>> + Value *DstSV, uint64_t
>> DstOff);
>> + SDOperand EmitTargetCodeForMemcpy(SelectionDAG &DAG,
>> + SDOperand Chain,
>> + SDOperand Dst, SDOperand Src,
>> + SDOperand Size, unsigned
>> Align,
>> + bool AlwaysInline,
>> + Value *DstSV, uint64_t DstOff,
>> + Value *SrcSV, uint64_t
>> SrcOff);
>> };
>> }
>>
>>
>> Modified: llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx (original)
>> +++ llvm/trunk/test/CodeGen/X86/2004-02-12-Memcpy.llx Fri Apr 11
>> 23:36:06 2008
>> @@ -1,5 +1,4 @@
>> -; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu |
>> grep movs | count 1
>> -; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu |
>> grep memcpy | count 2
>> +; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu |
>> grep movs | count 3
>>
>> @A = global [32 x i32] zeroinitializer
>> @B = global [32 x i32] zeroinitializer
>>
>> Modified: llvm/trunk/test/CodeGen/X86/byval2.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/byval2.ll?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/byval2.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/byval2.ll Fri Apr 11 23:36:06 2008
>> @@ -1,7 +1,9 @@
>> ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
>> ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
>>
>> -%struct.s = type { i64, i64, i64 }
>> +%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
>> + i64, i64, i64, i64, i64, i64, i64, i64,
>> + i64 }
>>
>> define void @g(i64 %a, i64 %b, i64 %c) {
>> entry:
>>
>> Modified: llvm/trunk/test/CodeGen/X86/byval3.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/byval3.ll?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/byval3.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/byval3.ll Fri Apr 11 23:36:06 2008
>> @@ -1,7 +1,11 @@
>> ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
>> ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
>>
>> -%struct.s = type { i32, i32, i32, i32, i32, i32 }
>> +%struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
>> + i32, i32, i32, i32, i32, i32, i32, i32,
>> + i32, i32, i32, i32, i32, i32, i32, i32,
>> + i32, i32, i32, i32, i32, i32, i32, i32,
>> + i32 }
>>
>> define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32
>> %a6) {
>> entry:
>>
>> Modified: llvm/trunk/test/CodeGen/X86/byval4.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/byval4.ll?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/byval4.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/byval4.ll Fri Apr 11 23:36:06 2008
>> @@ -1,7 +1,15 @@
>> ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2
>> ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
>>
>> -%struct.s = type { i16, i16, i16, i16, i16, i16 }
>> +%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
>> + i16, i16, i16, i16, i16, i16, i16, i16,
>> + i16, i16, i16, i16, i16, i16, i16, i16,
>> + i16, i16, i16, i16, i16, i16, i16, i16,
>> + i16, i16, i16, i16, i16, i16, i16, i16,
>> + i16, i16, i16, i16, i16, i16, i16, i16,
>> + i16, i16, i16, i16, i16, i16, i16, i16,
>> + i16, i16, i16, i16, i16, i16, i16, i16,
>> + i16 }
>>
>>
>> define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3,
>>
>> Modified: llvm/trunk/test/CodeGen/X86/byval5.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/byval5.ll?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/byval5.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/byval5.ll Fri Apr 11 23:36:06 2008
>> @@ -1,7 +1,23 @@
>> ; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2
>> ; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
>>
>> -%struct.s = type { i8, i8, i8, i8, i8, i8 }
>> +%struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8, i8, i8, i8, i8, i8, i8, i8,
>> + i8 }
>>
>>
>> define void @g(i8 signext %a1, i8 signext %a2, i8 signext %a3,
>>
>> Modified: llvm/trunk/test/CodeGen/X86/byval7.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/byval7.ll?rev=49572&r1=49571&r2=49572&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/byval7.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/byval7.ll Fri Apr 11 23:36:06 2008
>> @@ -1,6 +1,7 @@
>> ; RUN: llvm-as < %s | llc -march=x86 -mcpu=yonah | grep add | grep 16
>>
>> - %struct.S = type { <2 x i64> }
>> + %struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
>> + <2 x i64> }
>>
>> define i32 @main() nounwind {
>> entry:
>>
>> Added: llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll?rev=49572&view=auto
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll Fri Apr 11
>> 23:36:06 2008
>> @@ -0,0 +1,22 @@
>> +; RUN: llvm-as < %s | llc | not grep movs
>> +
>> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
>> i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-
>> f80:128:128"
>> +target triple = "i386-apple-darwin8"
>> +
>> +define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret
>> %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %z) nounwind {
>> +entry:
>> + %iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*>
>> [#uses=3]
>> + %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1 ;
>> <x86_fp80*> [#uses=1]
>> + %tmp2 = load x86_fp80* %tmp1, align 16 ; <x86_fp80> [#uses=1]
>> + %tmp3 = sub x86_fp80 0xK80000000000000000000, %tmp2 ; <x86_fp80>
>> [#uses=1]
>> + %tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1 ;
>> <x86_fp80*> [#uses=1]
>> + %real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0 ;
>> <x86_fp80*> [#uses=1]
>> + %tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0 ;
>> <x86_fp80*> [#uses=1]
>> + %tmp7 = load x86_fp80* %tmp6, align 16 ; <x86_fp80> [#uses=1]
>> + store x86_fp80 %tmp3, x86_fp80* %real, align 16
>> + store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16
>> + call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret
>> %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %iz ) nounwind
>> + ret void
>> +}
>> +
>> +declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret ,
>> { x86_fp80, x86_fp80 }* byval align 4 ) nounwind
>>
>> Added: llvm/trunk/test/CodeGen/X86/variable-sized-darwin-bzero.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/variable-sized-darwin-bzero.ll?rev=49572&view=auto
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/variable-sized-darwin-bzero.ll
>> (added)
>> +++ llvm/trunk/test/CodeGen/X86/variable-sized-darwin-bzero.ll Fri
>> Apr 11 23:36:06 2008
>> @@ -0,0 +1,8 @@
>> +; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-apple-darwin10 |
>> grep __bzero
>> +
>> +declare void @llvm.memset.i64(i8*, i8, i64, i32)
>> +
>> +define void @foo(i8* %p, i64 %n) {
>> + call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4)
>> + ret void
>> +}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list