[llvm-commits] [llvm] r100090 - in /llvm/trunk: include/llvm/Target/ lib/CodeGen/SelectionDAG/ lib/Target/PowerPC/ lib/Target/X86/ test/CodeGen/X86/

Thu Apr 1 13:02:45 PDT 2010

On Apr 1, 2010, at 12:49 PM, Dan Gohman wrote:

> Hi Evan,
> 
> A few comments below.
> 
> On Mar 31, 2010, at 11:04 PM, Evan Cheng wrote:
> 
>> Author: evancheng
>> Date: Thu Apr  1 01:04:33 2010
>> New Revision: 100090
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=100090&view=rev
>> Log:
>> Fix sdisel memcpy, memset, memmove lowering:
>> 1. Makes it possible to lower with floating point loads and stores.
>> 2. Avoid unaligned loads / stores unless it's fast.
>> 3. Fix some memcpy lowering logic bug related to when to optimize a
>>  load from constant string into a constant.
>> 4. Adjust x86 memcpy lowering threshold to make it more sane.
>> 5. Fix x86 target hook so it uses vector and floating point memory
>>  ops more effectively.
>> rdar://7774704
>> 
>> Modified:
>>   llvm/trunk/include/llvm/Target/TargetLowering.h
>>   llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>>   llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
>>   llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
>>   llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
>>   llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>>   llvm/trunk/lib/Target/X86/X86ISelLowering.h
>>   llvm/trunk/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
>>   llvm/trunk/test/CodeGen/X86/byval7.ll
>>   llvm/trunk/test/CodeGen/X86/memcpy-2.ll
>>   llvm/trunk/test/CodeGen/X86/memset-2.ll
>>   llvm/trunk/test/CodeGen/X86/memset64-on-x86-32.ll
>>   llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll
>>   llvm/trunk/test/CodeGen/X86/unaligned-load.ll
>> 
> 
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=100090&r1=100089&r2=100090&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Apr  1 01:04:33 2010
> 
>> @@ -5250,17 +5250,6 @@
>>  SDValue Value = ST->getValue();
>>  SDValue Ptr   = ST->getBasePtr();
>> 
>> -  // Try to infer better alignment information than the store already has.
>> -  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
>> -    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
>> -      if (Align > ST->getAlignment())
>> -        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
>> -                                 Ptr, ST->getSrcValue(),
>> -                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
>> -                                 ST->isVolatile(), ST->isNonTemporal(), Align);
>> -    }
>> -  }
>> -
>>  // If this is a store of a bit convert, store the input value if the
>>  // resultant store does not need a higher alignment than the original.
>>  if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
>> @@ -5351,6 +5340,17 @@
>>    }
>>  }
>> 
>> +  // Try to infer better alignment information than the store already has.
>> +  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
>> +    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
>> +      if (Align > ST->getAlignment())
>> +        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
>> +                                 Ptr, ST->getSrcValue(),
>> +                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
>> +                                 ST->isVolatile(), ST->isNonTemporal(), Align);
>> +    }
>> +  }
>> +
> 
> Why did this code block move? It seems that the code it now follows
> would also benefit from having more precise alignment information.

This code creates a truncating store and it ends up inhibiting other store dag combines.

> 
> On the other hand, instcombine's alignment refinement code has been
> substantially improved since this code in dagcombine was originally
> written, so dagcombine may no longer need it at all.

That's definitely not what I am seeing though.

> 
>>  if (CombinerAA) {
>>    // Walk up chain skipping non-aliasing memory nodes.
>>    SDValue BetterChain = FindBetterChain(N, Chain);
>> 
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=100090&r1=100089&r2=100090&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Apr  1 01:04:33 2010
>> @@ -3132,11 +3132,17 @@
>>  if (Str.empty()) {
>>    if (VT.isInteger())
>>      return DAG.getConstant(0, VT);
>> -    unsigned NumElts = VT.getVectorNumElements();
>> -    MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
>> -    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
>> -                       DAG.getConstant(0,
>> -                       EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
>> +    else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
>> +             VT.getSimpleVT().SimpleTy == MVT::f64)
>> +      return DAG.getConstantFP(0.0, VT);
>> +    else if (VT.isVector()) {
>> +      unsigned NumElts = VT.getVectorNumElements();
>> +      MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
>> +      return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
>> +                         DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
>> +                                                             EltVT, NumElts)));
>> +    } else
>> +      llvm_unreachable("Expected type!");
>>  }
>> 
>>  assert(!VT.isVector() && "Can't handle vector type here!");
>> @@ -3184,51 +3190,33 @@
>>  return false;
>> }
>> 
>> -/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
>> -/// to replace the memset / memcpy is below the threshold. It also returns the
>> -/// types of the sequence of memory ops to perform memset / memcpy.
>> -static
>> -bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
>> -                              SDValue Dst, SDValue Src,
>> -                              unsigned Limit, uint64_t Size, unsigned &Align,
>> -                              std::string &Str, bool &isSrcStr,
>> -                              SelectionDAG &DAG,
>> -                              const TargetLowering &TLI) {
>> -  isSrcStr = isMemSrcFromString(Src, Str);
>> -  bool isSrcConst = isa<ConstantSDNode>(Src);
>> -  EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
>> -  bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
>> -  if (VT != MVT::Other) {
>> -    const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
>> -    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
>> -    // If source is a string constant, this will require an unaligned load.
>> -    if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
>> -      if (Dst.getOpcode() != ISD::FrameIndex) {
>> -        // Can't change destination alignment. It requires a unaligned store.
>> -        if (AllowUnalign)
>> -          VT = MVT::Other;
>> -      } else {
>> -        int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
>> -        MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
>> -        if (MFI->isFixedObjectIndex(FI)) {
>> -          // Can't change destination alignment. It requires a unaligned store.
>> -          if (AllowUnalign)
>> -            VT = MVT::Other;
>> -        } else {
>> -          // Give the stack frame object a larger alignment if needed.
>> -          if (MFI->getObjectAlignment(FI) < NewAlign)
>> -            MFI->setObjectAlignment(FI, NewAlign);
>> -          Align = NewAlign;
>> -        }
>> -      }
>> -    }
>> -  }
>> +/// FindOptimalMemOpLowering - Determines the optimial series memory ops
>> +/// to replace the memset / memcpy. Return true if the number of memory ops
>> +/// is below the threshold. It returns the types of the sequence of
>> +/// memory ops to perform memset / memcpy by reference.
>> +static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
>> +                                     SDValue Dst, SDValue Src,
>> +                                     unsigned Limit, uint64_t Size,
>> +                                     unsigned DstAlign, unsigned SrcAlign,
>> +                                     SelectionDAG &DAG,
>> +                                     const TargetLowering &TLI) {
>> +  assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
>> +         "Expecting memcpy / memset source to meet alignment requirement!");
>> +  // If 'SrcAlign' is zero, that means the memory operation does not need load
>> +  // the value, i.e. memset or memcpy from constant string. Otherwise, it's
>> +  // the inferred alignment of the source. 'DstAlign', on the other hand, is the
>> +  // specified alignment of the memory operation. If it is zero, that means
>> +  // it's possible to change the alignment of the destination.
>> +  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, DAG);
>> 
>>  if (VT == MVT::Other) {
>> -    if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
>> +    VT = TLI.getPointerTy();
>> +    const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
>> +    if (DstAlign >= TLI.getTargetData()->getABITypeAlignment(Ty) ||
>> +        TLI.allowsUnalignedMemoryAccesses(VT)) {
>>      VT = MVT::i64;
>>    } else {
>> -      switch (Align & 7) {
>> +      switch (DstAlign & 7) {
>>      case 0:  VT = MVT::i64; break;
>>      case 4:  VT = MVT::i32; break;
>>      case 2:  VT = MVT::i16; break;
>> @@ -3250,7 +3238,7 @@
>>    unsigned VTSize = VT.getSizeInBits() / 8;
>>    while (VTSize > Size) {
>>      // For now, only use non-vector load / store's for the left-over pieces.
>> -      if (VT.isVector()) {
>> +      if (VT.isVector() || VT.isFloatingPoint()) {
>>        VT = MVT::i64;
>>        while (!TLI.isTypeLegal(VT))
>>          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
>> @@ -3286,15 +3274,33 @@
>>  uint64_t Limit = -1ULL;
>>  if (!AlwaysInline)
>>    Limit = TLI.getMaxStoresPerMemcpy();
>> -  unsigned DstAlign = Align;  // Destination alignment can change.
>> +  bool DstAlignCanChange = false;
>> +  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
>> +  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
>> +  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
>> +    DstAlignCanChange = true;
>> +  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
>> +  if (Align > SrcAlign)
>> +    SrcAlign = Align;
>>  std::string Str;
>> -  bool CopyFromStr;
>> -  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
>> -                                Str, CopyFromStr, DAG, TLI))
>> +  bool CopyFromStr = isMemSrcFromString(Src, Str);
>> +  bool isZeroStr = CopyFromStr && Str.empty();
>> +  if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
>> +                                (DstAlignCanChange ? 0 : Align),
>> +                                (isZeroStr ? 0 : SrcAlign), DAG, TLI))
>>    return SDValue();
>> 
>> +  if (DstAlignCanChange) {
>> +    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
>> +    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
>> +    if (NewAlign > Align) {
>> +      // Give the stack frame object a larger alignment if needed.
>> +      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
>> +        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
>> +      Align = NewAlign;
>> +    }
>> +  }
>> 
>> -  bool isZeroStr = CopyFromStr && Str.empty();
>>  SmallVector<SDValue, 8> OutChains;
>>  unsigned NumMemOps = MemOps.size();
>>  uint64_t SrcOff = 0, DstOff = 0;
>> @@ -3303,16 +3309,17 @@
>>    unsigned VTSize = VT.getSizeInBits() / 8;
>>    SDValue Value, Store;
>> 
>> -    if (CopyFromStr && (isZeroStr || !VT.isVector())) {
>> +    if (CopyFromStr &&
>> +        (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
>>      // It's unlikely a store of a vector immediate can be done in a single
>>      // instruction. It would require a load from a constantpool first.
>> -      // We also handle store a vector with all zero's.
>> +      // We only handle zero vectors here.
>>      // FIXME: Handle other cases where store of vector immediate is done in
>>      // a single instruction.
>>      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
>>      Store = DAG.getStore(Chain, dl, Value,
>>                           getMemBasePlusOffset(Dst, DstOff, DAG),
>> -                           DstSV, DstSVOff + DstOff, false, false, DstAlign);
>> +                           DstSV, DstSVOff + DstOff, false, false, Align);
>>    } else {
>>      // The type might not be legal for the target.  This should only happen
>>      // if the type is smaller than a legal type, as on PPC, so the right
>> @@ -3323,11 +3330,12 @@
>>      assert(NVT.bitsGE(VT));
>>      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
>>                             getMemBasePlusOffset(Src, SrcOff, DAG),
>> -                             SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
>> +                             SrcSV, SrcSVOff + SrcOff, VT, false, false,
>> +                             MinAlign(SrcAlign, SrcOff));
>>      Store = DAG.getTruncStore(Chain, dl, Value,
>>                                getMemBasePlusOffset(Dst, DstOff, DAG),
>>                                DstSV, DstSVOff + DstOff, VT, false, false,
>> -                                DstAlign);
>> +                                Align);
>>    }
>>    OutChains.push_back(Store);
>>    SrcOff += VTSize;
>> @@ -3339,11 +3347,11 @@
>> }
>> 
>> static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
>> -                                          SDValue Chain, SDValue Dst,
>> -                                          SDValue Src, uint64_t Size,
>> -                                          unsigned Align, bool AlwaysInline,
>> -                                          const Value *DstSV, uint64_t DstSVOff,
>> -                                          const Value *SrcSV, uint64_t SrcSVOff){
>> +                                        SDValue Chain, SDValue Dst,
>> +                                        SDValue Src, uint64_t Size,
>> +                                        unsigned Align,bool AlwaysInline,
>> +                                        const Value *DstSV, uint64_t DstSVOff,
>> +                                        const Value *SrcSV, uint64_t SrcSVOff) {
>>  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
>> 
>>  // Expand memmove to a series of load and store ops if the size operand falls
>> @@ -3352,15 +3360,32 @@
>>  uint64_t Limit = -1ULL;
>>  if (!AlwaysInline)
>>    Limit = TLI.getMaxStoresPerMemmove();
>> -  unsigned DstAlign = Align;  // Destination alignment can change.
>> -  std::string Str;
>> -  bool CopyFromStr;
>> -  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
>> -                                Str, CopyFromStr, DAG, TLI))
>> +  bool DstAlignCanChange = false;
>> +  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
>> +  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
>> +  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
>> +    DstAlignCanChange = true;
>> +  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
>> +  if (Align > SrcAlign)
>> +    SrcAlign = Align;
>> +
>> +  if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
>> +                                (DstAlignCanChange ? 0 : Align),
>> +                                SrcAlign, DAG, TLI))
>>    return SDValue();
>> 
>> -  uint64_t SrcOff = 0, DstOff = 0;
>> +  if (DstAlignCanChange) {
>> +    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
>> +    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
>> +    if (NewAlign > Align) {
>> +      // Give the stack frame object a larger alignment if needed.
>> +      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
>> +        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
>> +      Align = NewAlign;
>> +    }
>> +  }
>> 
>> +  uint64_t SrcOff = 0, DstOff = 0;
>>  SmallVector<SDValue, 8> LoadValues;
>>  SmallVector<SDValue, 8> LoadChains;
>>  SmallVector<SDValue, 8> OutChains;
>> @@ -3372,7 +3397,7 @@
>> 
>>    Value = DAG.getLoad(VT, dl, Chain,
>>                        getMemBasePlusOffset(Src, SrcOff, DAG),
>> -                        SrcSV, SrcSVOff + SrcOff, false, false, Align);
>> +                        SrcSV, SrcSVOff + SrcOff, false, false, SrcAlign);
>>    LoadValues.push_back(Value);
>>    LoadChains.push_back(Value.getValue(1));
>>    SrcOff += VTSize;
>> @@ -3387,7 +3412,7 @@
>> 
>>    Store = DAG.getStore(Chain, dl, LoadValues[i],
>>                         getMemBasePlusOffset(Dst, DstOff, DAG),
>> -                         DstSV, DstSVOff + DstOff, false, false, DstAlign);
>> +                         DstSV, DstSVOff + DstOff, false, false, Align);
>>    OutChains.push_back(Store);
>>    DstOff += VTSize;
>>  }
>> @@ -3397,24 +3422,38 @@
>> }
>> 
>> static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
>> -                                 SDValue Chain, SDValue Dst,
>> -                                 SDValue Src, uint64_t Size,
>> -                                 unsigned Align,
>> -                                 const Value *DstSV, uint64_t DstSVOff) {
>> +                               SDValue Chain, SDValue Dst,
>> +                               SDValue Src, uint64_t Size,
>> +                               unsigned Align,
>> +                               const Value *DstSV, uint64_t DstSVOff) {
>>  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
>> 
>>  // Expand memset to a series of load/store ops if the size operand
>>  // falls below a certain threshold.
>>  std::vector<EVT> MemOps;
>> -  std::string Str;
>> -  bool CopyFromStr;
>> -  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
>> -                                Size, Align, Str, CopyFromStr, DAG, TLI))
>> +  bool DstAlignCanChange = false;
>> +  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
>> +  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
>> +  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
>> +    DstAlignCanChange = true;
>> +  if (!FindOptimalMemOpLowering(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
>> +                                Size, (DstAlignCanChange ? 0 : Align), 0,
>> +                                DAG, TLI))
>>    return SDValue();
>> 
>> +  if (DstAlignCanChange) {
>> +    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
>> +    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
>> +    if (NewAlign > Align) {
>> +      // Give the stack frame object a larger alignment if needed.
>> +      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
>> +        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
>> +      Align = NewAlign;
>> +    }
>> +  }
>> +
>>  SmallVector<SDValue, 8> OutChains;
>>  uint64_t DstOff = 0;
>> -
>>  unsigned NumMemOps = MemOps.size();
>>  for (unsigned i = 0; i < NumMemOps; i++) {
>>    EVT VT = MemOps[i];
>> @@ -3445,10 +3484,9 @@
>>    if (ConstantSize->isNullValue())
>>      return Chain;
>> 
>> -    SDValue Result =
>> -      getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
>> -                              ConstantSize->getZExtValue(),
>> -                              Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
>> +    SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
>> +                                             ConstantSize->getZExtValue(),Align,
>> +                                       false, DstSV, DstSVOff, SrcSV, SrcSVOff);
>>    if (Result.getNode())
>>      return Result;
>>  }
>> @@ -6106,8 +6144,18 @@
>>  // If this is a GlobalAddress + cst, return the alignment.
>>  GlobalValue *GV;
>>  int64_t GVOffset = 0;
>> -  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
>> -    return MinAlign(GV->getAlignment(), GVOffset);
>> +  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
>> +    // If GV has specified alignment, then use it. Otherwise, use the preferred
>> +    // alignment.
>> +    unsigned Align = GV->getAlignment();
>> +    if (!Align) {
>> +      if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
>> +        const TargetData *TD = TLI.getTargetData();
>> +        Align = TD->getPreferredAlignment(GVar);
>> +      }
>> +    }
> 
> I don't think it's safe to check for the preferred alignment here. If the
> variable is defined in a different translation unit, it may not be aligned
> more than the ABI alignment or the GV->getAlignment() value.

Ok, I'll skip declarations.

> 
> The optimizer should already be upgrading global variables to their
> preferred alignment when safe, so codegen shouldn't have to replicate this.

Again, that doesn't match what I am seeing.

> 
>> +    return MinAlign(Align, GVOffset);
>> +  }
>> 
>>  // If this is a direct reference to a stack slot, use information about the
>>  // stack slot's alignment.
>> 
> 
>> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=100090&r1=100089&r2=100090&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
>> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Apr  1 01:04:33 2010
>> @@ -1012,7 +1012,7 @@
>>  // FIXME: These should be based on subtarget info. Plus, the values should
>>  // be smaller when we are in optimizing for size mode.
>>  maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
>> -  maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
>> +  maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
>>  maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
>>  setPrefLoopAlignment(16);
>>  benefitFromCodePlacementOpt = true;
>> @@ -1074,19 +1074,27 @@
>> /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
>> /// determining it.
>> EVT
>> -X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
>> -                                       bool isSrcConst, bool isSrcStr,
>> +X86TargetLowering::getOptimalMemOpType(uint64_t Size,
>> +                                       unsigned DstAlign, unsigned SrcAlign,
>>                                       SelectionDAG &DAG) const {
>>  // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
>>  // linux.  This is because the stack realignment code can't handle certain
>>  // cases like PR2962.  This should be removed when PR2962 is fixed.
>>  const Function *F = DAG.getMachineFunction().getFunction();
>> -  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
>> -  if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
>> -    if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
>> -      return MVT::v4i32;
>> -    if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
>> -      return MVT::v4f32;
>> +  if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
>> +    if (Size >= 16 &&
>> +        (Subtarget->isUnalignedMemAccessFast() ||
>> +         (DstAlign == 0 || DstAlign >= 16) &&
>> +         (SrcAlign == 0 || SrcAlign >= 16)) &&
> 
> It's not obvious what this code intends. What does a 0 align value
> mean in this context?

0 means don't care. I added comments to the client side. I'll add them here as well.

> 
>> +        Subtarget->getStackAlignment() >= 16) {
>> +      if (Subtarget->hasSSE2())
>> +        return MVT::v4i32;
>> +      if (Subtarget->hasSSE1())
>> +        return MVT::v4f32;
>> +    } else if (Size >= 8 &&
>> +               Subtarget->getStackAlignment() >= 8 &&
>> +               Subtarget->hasSSE2())
>> +      return MVT::f64;
>>  }
> 
> Why is f64 chosen here? I guess it's because all targets where
> i64 is legal have SSE2, so they wouldn't get here. That's a bit
> subtle though; could you add a comment about this?

Ok, I suppose it should choose i64 if that's legal. f64 can be used when i64 is not legal though.

Evan
> 
> Thanks,
> 
> Dan
> 
>>  if (Subtarget->is64Bit() && Size >= 8)
>>    return MVT::i64;
>> 
>