[llvm-branch-commits] [llvm-branch] r100091 - in /llvm/branches/Apple/Morbo: ./ include/llvm/Target/ lib/CodeGen/SelectionDAG/ lib/Target/PowerPC/ lib/Target/X86/ lib/Transforms/IPO/ test/CodeGen/X86/

Wed Mar 31 23:19:43 PDT 2010

Author: evancheng
Date: Thu Apr  1 01:19:43 2010
New Revision: 100091

URL: http://llvm.org/viewvc/llvm-project?rev=100091&view=rev
Log:
Merge: 100089 and 100090.

Modified:
    llvm/branches/Apple/Morbo/   (props changed)
    llvm/branches/Apple/Morbo/include/llvm/Target/TargetLowering.h
    llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.h
    llvm/branches/Apple/Morbo/lib/Target/X86/X86.td
    llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.cpp
    llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.h
    llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.cpp
    llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.h
    llvm/branches/Apple/Morbo/lib/Transforms/IPO/FunctionAttrs.cpp   (props changed)
    llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/byval7.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/memcpy-2.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/memset-2.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/memset64-on-x86-32.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/small-byval-memcpy.ll
    llvm/branches/Apple/Morbo/test/CodeGen/X86/unaligned-load.ll

Propchange: llvm/branches/Apple/Morbo/
------------------------------------------------------------------------------

--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Apr  1 01:19:43 2010
@@ -1,2 +1,2 @@
 /llvm/branches/Apple/Hermes:96832,96835,96858,96870,96876,96879
-/llvm/trunk:98602,98604,98612,98615-98616,98675,98686,98743-98744,98773,98778,98780,98810,98835,98839,98845,98855,98862,98881,98920,98977,99032-99033,99043,99196,99223,99263,99282-99284,99306,99319-99321,99324,99336,99378,99418,99423,99429,99455,99463,99465,99469,99484,99490,99492-99494,99507,99524,99537,99539-99540,99544,99570,99575,99629-99630,99636,99671,99692,99695,99697,99699,99722,99816,99845-99846,99848,99850,99855,99899,99910,99916,99952-99954,99957,99959,99974-99975,99982,99984-99986,99988,99992-99993,99995,99997-99999,100016,100035,100044
+/llvm/trunk:98602,98604,98612,98615-98616,98675,98686,98743-98744,98773,98778,98780,98810,98835,98839,98845,98855,98862,98881,98920,98977,99032-99033,99043,99196,99223,99263,99282-99284,99306,99319-99321,99324,99336,99378,99418,99423,99429,99455,99463,99465,99469,99484,99490,99492-99494,99507,99524,99537,99539-99540,99544,99570,99575,99598,99620,99629-99630,99636,99671,99692,99695,99697,99699,99722,99816,99836,99845-99846,99848,99850,99855,99879,99899,99910,99916,99952-99954,99957,99959,99974-99975,99982,99984-99986,99988,99992-99993,99995,99997-99999,100016,100035,100044,100089-100090

Modified: llvm/branches/Apple/Morbo/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/include/llvm/Target/TargetLowering.h?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/include/llvm/Target/TargetLowering.h (original)
+++ llvm/branches/Apple/Morbo/include/llvm/Target/TargetLowering.h Thu Apr  1 01:19:43 2010
@@ -545,7 +545,7 @@
   /// counterpart (e.g. structs), otherwise it will assert.
   EVT getValueType(const Type *Ty, bool AllowUnknown = false) const {
     EVT VT = EVT::getEVT(Ty, AllowUnknown);
-    return VT == MVT:: iPTR ? PointerTy : VT;
+    return VT == MVT::iPTR ? PointerTy : VT;
   }
 
   /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
@@ -659,8 +659,8 @@
   /// and store operations as a result of memset, memcpy, and memmove lowering.
   /// It returns EVT::Other if SelectionDAG should be responsible for
   /// determining it.
-  virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                  bool isSrcConst, bool isSrcStr,
+  virtual EVT getOptimalMemOpType(uint64_t Size,
+                                  unsigned DstAlign, unsigned SrcAlign,
                                   SelectionDAG &DAG) const {
     return MVT::Other;
   }

Modified: llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Apr  1 01:19:43 2010
@@ -5022,18 +5022,6 @@
   SDValue Chain = LD->getChain();
   SDValue Ptr   = LD->getBasePtr();
 
-  // Try to infer better alignment information than the load already has.
-  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
-    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
-      if (Align > LD->getAlignment())
-        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
-                              LD->getValueType(0),
-                              Chain, Ptr, LD->getSrcValue(),
-                              LD->getSrcValueOffset(), LD->getMemoryVT(),
-                              LD->isVolatile(), LD->isNonTemporal(), Align);
-    }
-  }
-
   // If load is not volatile and there are no uses of the loaded value (and
   // the updated indexed value in case of indexed loads), change uses of the
   // chain value into uses of the chain input (i.e. delete the dead load).
@@ -5099,6 +5087,18 @@
     }
   }
 
+  // Try to infer better alignment information than the load already has.
+  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > LD->getAlignment())
+        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+                              LD->getValueType(0),
+                              Chain, Ptr, LD->getSrcValue(),
+                              LD->getSrcValueOffset(), LD->getMemoryVT(),
+                              LD->isVolatile(), LD->isNonTemporal(), Align);
+    }
+  }
+
   if (CombinerAA) {
     // Walk up chain skipping non-aliasing memory nodes.
     SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5250,17 +5250,6 @@
   SDValue Value = ST->getValue();
   SDValue Ptr   = ST->getBasePtr();
 
-  // Try to infer better alignment information than the store already has.
-  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
-    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
-      if (Align > ST->getAlignment())
-        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
-                                 Ptr, ST->getSrcValue(),
-                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
-                                 ST->isVolatile(), ST->isNonTemporal(), Align);
-    }
-  }
-
   // If this is a store of a bit convert, store the input value if the
   // resultant store does not need a higher alignment than the original.
   if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
@@ -5351,6 +5340,17 @@
     }
   }
 
+  // Try to infer better alignment information than the store already has.
+  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > ST->getAlignment())
+        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+                                 Ptr, ST->getSrcValue(),
+                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
+                                 ST->isVolatile(), ST->isNonTemporal(), Align);
+    }
+  }
+
   if (CombinerAA) {
     // Walk up chain skipping non-aliasing memory nodes.
     SDValue BetterChain = FindBetterChain(N, Chain);

Modified: llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Apr  1 01:19:43 2010
@@ -3132,11 +3132,17 @@
   if (Str.empty()) {
     if (VT.isInteger())
       return DAG.getConstant(0, VT);
-    unsigned NumElts = VT.getVectorNumElements();
-    MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
-    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
-                       DAG.getConstant(0,
-                       EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
+    else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+             VT.getSimpleVT().SimpleTy == MVT::f64)
+      return DAG.getConstantFP(0.0, VT);
+    else if (VT.isVector()) {
+      unsigned NumElts = VT.getVectorNumElements();
+      MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+      return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+                         DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
+                                                             EltVT, NumElts)));
+    } else
+      llvm_unreachable("Expected type!");
   }
 
   assert(!VT.isVector() && "Can't handle vector type here!");
@@ -3184,51 +3190,33 @@
   return false;
 }
 
-/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
-/// to replace the memset / memcpy is below the threshold. It also returns the
-/// types of the sequence of memory ops to perform memset / memcpy.
-static
-bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
-                              SDValue Dst, SDValue Src,
-                              unsigned Limit, uint64_t Size, unsigned &Align,
-                              std::string &Str, bool &isSrcStr,
-                              SelectionDAG &DAG,
-                              const TargetLowering &TLI) {
-  isSrcStr = isMemSrcFromString(Src, Str);
-  bool isSrcConst = isa<ConstantSDNode>(Src);
-  EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
-  bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
-  if (VT != MVT::Other) {
-    const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
-    // If source is a string constant, this will require an unaligned load.
-    if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
-      if (Dst.getOpcode() != ISD::FrameIndex) {
-        // Can't change destination alignment. It requires a unaligned store.
-        if (AllowUnalign)
-          VT = MVT::Other;
-      } else {
-        int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
-        MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-        if (MFI->isFixedObjectIndex(FI)) {
-          // Can't change destination alignment. It requires a unaligned store.
-          if (AllowUnalign)
-            VT = MVT::Other;
-        } else {
-          // Give the stack frame object a larger alignment if needed.
-          if (MFI->getObjectAlignment(FI) < NewAlign)
-            MFI->setObjectAlignment(FI, NewAlign);
-          Align = NewAlign;
-        }
-      }
-    }
-  }
+/// FindOptimalMemOpLowering - Determines the optimial series memory ops
+/// to replace the memset / memcpy. Return true if the number of memory ops
+/// is below the threshold. It returns the types of the sequence of
+/// memory ops to perform memset / memcpy by reference.
+static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
+                                     SDValue Dst, SDValue Src,
+                                     unsigned Limit, uint64_t Size,
+                                     unsigned DstAlign, unsigned SrcAlign,
+                                     SelectionDAG &DAG,
+                                     const TargetLowering &TLI) {
+  assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
+         "Expecting memcpy / memset source to meet alignment requirement!");
+  // If 'SrcAlign' is zero, that means the memory operation does not need load
+  // the value, i.e. memset or memcpy from constant string. Otherwise, it's
+  // the inferred alignment of the source. 'DstAlign', on the other hand, is the
+  // specified alignment of the memory operation. If it is zero, that means
+  // it's possible to change the alignment of the destination.
+  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, DAG);
 
   if (VT == MVT::Other) {
-    if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
+    VT = TLI.getPointerTy();
+    const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+    if (DstAlign >= TLI.getTargetData()->getABITypeAlignment(Ty) ||
+        TLI.allowsUnalignedMemoryAccesses(VT)) {
       VT = MVT::i64;
     } else {
-      switch (Align & 7) {
+      switch (DstAlign & 7) {
       case 0:  VT = MVT::i64; break;
       case 4:  VT = MVT::i32; break;
       case 2:  VT = MVT::i16; break;
@@ -3250,7 +3238,7 @@
     unsigned VTSize = VT.getSizeInBits() / 8;
     while (VTSize > Size) {
       // For now, only use non-vector load / store's for the left-over pieces.
-      if (VT.isVector()) {
+      if (VT.isVector() || VT.isFloatingPoint()) {
         VT = MVT::i64;
         while (!TLI.isTypeLegal(VT))
           VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
@@ -3286,15 +3274,33 @@
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
     Limit = TLI.getMaxStoresPerMemcpy();
-  unsigned DstAlign = Align;  // Destination alignment can change.
+  bool DstAlignCanChange = false;
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  if (Align > SrcAlign)
+    SrcAlign = Align;
   std::string Str;
-  bool CopyFromStr;
-  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
-                                Str, CopyFromStr, DAG, TLI))
+  bool CopyFromStr = isMemSrcFromString(Src, Str);
+  bool isZeroStr = CopyFromStr && Str.empty();
+  if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
+                                (DstAlignCanChange ? 0 : Align),
+                                (isZeroStr ? 0 : SrcAlign), DAG, TLI))
     return SDValue();
 
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
 
-  bool isZeroStr = CopyFromStr && Str.empty();
   SmallVector<SDValue, 8> OutChains;
   unsigned NumMemOps = MemOps.size();
   uint64_t SrcOff = 0, DstOff = 0;
@@ -3303,16 +3309,17 @@
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
-    if (CopyFromStr && (isZeroStr || !VT.isVector())) {
+    if (CopyFromStr &&
+        (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
       // It's unlikely a store of a vector immediate can be done in a single
       // instruction. It would require a load from a constantpool first.
-      // We also handle store a vector with all zero's.
+      // We only handle zero vectors here.
       // FIXME: Handle other cases where store of vector immediate is done in
       // a single instruction.
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstSV, DstSVOff + DstOff, false, false, DstAlign);
+                           DstSV, DstSVOff + DstOff, false, false, Align);
     } else {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
@@ -3323,11 +3330,12 @@
       assert(NVT.bitsGE(VT));
       Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              getMemBasePlusOffset(Src, SrcOff, DAG),
-                             SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
+                             SrcSV, SrcSVOff + SrcOff, VT, false, false,
+                             MinAlign(SrcAlign, SrcOff));
       Store = DAG.getTruncStore(Chain, dl, Value,
                                 getMemBasePlusOffset(Dst, DstOff, DAG),
                                 DstSV, DstSVOff + DstOff, VT, false, false,
-                                DstAlign);
+                                Align);
     }
     OutChains.push_back(Store);
     SrcOff += VTSize;
@@ -3339,11 +3347,11 @@
 }
 
 static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
-                                          SDValue Chain, SDValue Dst,
-                                          SDValue Src, uint64_t Size,
-                                          unsigned Align, bool AlwaysInline,
-                                          const Value *DstSV, uint64_t DstSVOff,
-                                          const Value *SrcSV, uint64_t SrcSVOff){
+                                        SDValue Chain, SDValue Dst,
+                                        SDValue Src, uint64_t Size,
+                                        unsigned Align,bool AlwaysInline,
+                                        const Value *DstSV, uint64_t DstSVOff,
+                                        const Value *SrcSV, uint64_t SrcSVOff) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Expand memmove to a series of load and store ops if the size operand falls
@@ -3352,15 +3360,32 @@
   uint64_t Limit = -1ULL;
   if (!AlwaysInline)
     Limit = TLI.getMaxStoresPerMemmove();
-  unsigned DstAlign = Align;  // Destination alignment can change.
-  std::string Str;
-  bool CopyFromStr;
-  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
-                                Str, CopyFromStr, DAG, TLI))
+  bool DstAlignCanChange = false;
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  if (Align > SrcAlign)
+    SrcAlign = Align;
+
+  if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
+                                (DstAlignCanChange ? 0 : Align),
+                                SrcAlign, DAG, TLI))
     return SDValue();
 
-  uint64_t SrcOff = 0, DstOff = 0;
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
 
+  uint64_t SrcOff = 0, DstOff = 0;
   SmallVector<SDValue, 8> LoadValues;
   SmallVector<SDValue, 8> LoadChains;
   SmallVector<SDValue, 8> OutChains;
@@ -3372,7 +3397,7 @@
 
     Value = DAG.getLoad(VT, dl, Chain,
                         getMemBasePlusOffset(Src, SrcOff, DAG),
-                        SrcSV, SrcSVOff + SrcOff, false, false, Align);
+                        SrcSV, SrcSVOff + SrcOff, false, false, SrcAlign);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;
@@ -3387,7 +3412,7 @@
 
     Store = DAG.getStore(Chain, dl, LoadValues[i],
                          getMemBasePlusOffset(Dst, DstOff, DAG),
-                         DstSV, DstSVOff + DstOff, false, false, DstAlign);
+                         DstSV, DstSVOff + DstOff, false, false, Align);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3397,24 +3422,38 @@
 }
 
 static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
-                                 SDValue Chain, SDValue Dst,
-                                 SDValue Src, uint64_t Size,
-                                 unsigned Align,
-                                 const Value *DstSV, uint64_t DstSVOff) {
+                               SDValue Chain, SDValue Dst,
+                               SDValue Src, uint64_t Size,
+                               unsigned Align,
+                               const Value *DstSV, uint64_t DstSVOff) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Expand memset to a series of load/store ops if the size operand
   // falls below a certain threshold.
   std::vector<EVT> MemOps;
-  std::string Str;
-  bool CopyFromStr;
-  if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
-                                Size, Align, Str, CopyFromStr, DAG, TLI))
+  bool DstAlignCanChange = false;
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  if (!FindOptimalMemOpLowering(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
+                                Size, (DstAlignCanChange ? 0 : Align), 0,
+                                DAG, TLI))
     return SDValue();
 
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
+
   SmallVector<SDValue, 8> OutChains;
   uint64_t DstOff = 0;
-
   unsigned NumMemOps = MemOps.size();
   for (unsigned i = 0; i < NumMemOps; i++) {
     EVT VT = MemOps[i];
@@ -3445,10 +3484,9 @@
     if (ConstantSize->isNullValue())
       return Chain;
 
-    SDValue Result =
-      getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
-                              ConstantSize->getZExtValue(),
-                              Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+    SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+                                             ConstantSize->getZExtValue(),Align,
+                                       false, DstSV, DstSVOff, SrcSV, SrcSVOff);
     if (Result.getNode())
       return Result;
   }
@@ -6109,8 +6147,18 @@
   // If this is a GlobalAddress + cst, return the alignment.
   GlobalValue *GV;
   int64_t GVOffset = 0;
-  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
-    return MinAlign(GV->getAlignment(), GVOffset);
+  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+    // If GV has specified alignment, then use it. Otherwise, use the preferred
+    // alignment.
+    unsigned Align = GV->getAlignment();
+    if (!Align) {
+      if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+        const TargetData *TD = TLI.getTargetData();
+        Align = TD->getPreferredAlignment(GVar);
+      }
+    }
+    return MinAlign(Align, GVOffset);
+  }
 
   // If this is a direct reference to a stack slot, use information about the
   // stack slot's alignment.

Modified: llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.cpp?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.cpp Thu Apr  1 01:19:43 2010
@@ -5539,8 +5539,8 @@
   return false;
 }
 
-EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                           bool isSrcConst, bool isSrcStr,
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+                                           unsigned DstAlign, unsigned SrcAlign,
                                            SelectionDAG &DAG) const {
   if (this->PPCSubTarget.isPPC64()) {
     return MVT::i64;

Modified: llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.h?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/branches/Apple/Morbo/lib/Target/PowerPC/PPCISelLowering.h Thu Apr  1 01:19:43 2010
@@ -347,8 +347,8 @@
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
     
-    virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                    bool isSrcConst, bool isSrcStr,
+    virtual EVT getOptimalMemOpType(uint64_t Size,
+                                    unsigned DstAlign, unsigned SrcAlign,
                                     SelectionDAG &DAG) const;
 
     /// getFunctionAlignment - Return the Log2 alignment of this function.

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86.td?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86.td (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86.td Thu Apr  1 01:19:43 2010
@@ -59,6 +59,9 @@
                                       [FeatureCMOV]>;
 def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
                                        "Bit testing of memory is slow">;
+def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
+                                        "IsUAMemFast", "true",
+                                        "Fast unaligned memory access">;
 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
                                       "Support SSE 4a instructions">;
 
@@ -98,8 +101,10 @@
 def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"atom",            [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"corei7",          [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"corei7",          [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem]>;
+def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem]>;
 // Sandy Bridge does not have FMA
 def : Proc<"sandybridge",     [FeatureSSE42,  FeatureAVX,   Feature64Bit]>;
 

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.cpp?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.cpp Thu Apr  1 01:19:43 2010
@@ -1008,7 +1008,7 @@
   // FIXME: These should be based on subtarget info. Plus, the values should
   // be smaller when we are in optimizing for size mode.
   maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
-  maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
+  maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
   maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
   setPrefLoopAlignment(16);
   benefitFromCodePlacementOpt = true;
@@ -1070,19 +1070,27 @@
 /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
 /// determining it.
 EVT
-X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                       bool isSrcConst, bool isSrcStr,
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+                                       unsigned DstAlign, unsigned SrcAlign,
                                        SelectionDAG &DAG) const {
   // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
   // linux.  This is because the stack realignment code can't handle certain
   // cases like PR2962.  This should be removed when PR2962 is fixed.
   const Function *F = DAG.getMachineFunction().getFunction();
-  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
-  if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
-    if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
-      return MVT::v4i32;
-    if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
-      return MVT::v4f32;
+  if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
+    if (Size >= 16 &&
+        (Subtarget->isUnalignedMemAccessFast() ||
+         (DstAlign == 0 || DstAlign >= 16) &&
+         (SrcAlign == 0 || SrcAlign >= 16)) &&
+        Subtarget->getStackAlignment() >= 16) {
+      if (Subtarget->hasSSE2())
+        return MVT::v4i32;
+      if (Subtarget->hasSSE1())
+        return MVT::v4f32;
+    } else if (Size >= 8 &&
+               Subtarget->getStackAlignment() >= 8 &&
+               Subtarget->hasSSE2())
+      return MVT::f64;
   }
   if (Subtarget->is64Bit() && Size >= 8)
     return MVT::i64;

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.h?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86ISelLowering.h Thu Apr  1 01:19:43 2010
@@ -420,8 +420,8 @@
     /// and store operations as a result of memset, memcpy, and memmove
     /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
     /// determining it.
-    virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
-                                    bool isSrcConst, bool isSrcStr,
+    virtual EVT getOptimalMemOpType(uint64_t Size,
+                                    unsigned DstAlign, unsigned SrcAlign,
                                     SelectionDAG &DAG) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.cpp?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.cpp Thu Apr  1 01:19:43 2010
@@ -266,6 +266,9 @@
     unsigned Model  = 0;
     DetectFamilyModel(EAX, Family, Model);
     IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+    // If it's Nehalem, unaligned memory access is fast.
+    if (Family == 15 && Model == 26)
+      IsUAMemFast = true;
 
     GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
     HasX86_64 = (EDX >> 29) & 0x1;
@@ -286,6 +289,7 @@
   , HasFMA3(false)
   , HasFMA4(false)
   , IsBTMemSlow(false)
+  , IsUAMemFast(false)
   , HasVectorUAMem(false)
   , DarwinVers(0)
   , stackAlignment(8)

Modified: llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.h?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/branches/Apple/Morbo/lib/Target/X86/X86Subtarget.h Thu Apr  1 01:19:43 2010
@@ -78,6 +78,9 @@
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
+  /// IsUAMemFast - True if unaligned memory access is fast.
+  bool IsUAMemFast;
+
   /// HasVectorUAMem - True if SIMD operations can have unaligned memory
   ///                  operands. This may require setting a feature bit in the
   ///                  processor.
@@ -148,6 +151,7 @@
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
+  bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
   bool hasVectorUAMem() const { return HasVectorUAMem; }
 
   bool isTargetDarwin() const { return TargetType == isDarwin; }

Propchange: llvm/branches/Apple/Morbo/lib/Transforms/IPO/FunctionAttrs.cpp
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Apr  1 01:19:43 2010
@@ -1 +1 @@
-/llvm/trunk/lib/Transforms/IPO/FunctionAttrs.cpp:99196,99492,99507,99524,99539-99540,99636,99699,99816,99836,99845-99846,99848,99850,99855,99899,99910,99916,99952-99954,99957,99959,99974-99975,99982,99984-99986,99988,99992-99993,99995,99997-99999,100016,100035,100044
+/llvm/trunk/lib/Transforms/IPO/FunctionAttrs.cpp:99196,99492,99507,99524,99539-99540,99636,99699,99816,99836,99845-99846,99848,99850,99855,99879,99899,99910,99916,99952-99954,99957,99959,99974-99975,99982,99984-99986,99988,99992-99993,99995,99997-99999,100016,100035,100044,100089-100090

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll Thu Apr  1 01:19:43 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
 ; rdar://7396984
 
 @str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/byval7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/byval7.ll?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/byval7.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/byval7.ll Thu Apr  1 01:19:43 2010
@@ -1,10 +1,17 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
 
 	%struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
+                           <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
                            <2 x i64> }
 
 define i32 @main() nounwind  {
 entry:
+; CHECK: main:
+; CHECK: movl $1, (%esp)
+; CHECK: leal 16(%esp), %edi
+; CHECK: movl $36, %ecx
+; CHECK: leal 160(%esp), %esi
+; CHECK: rep;movsl
 	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
 	%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0		; <<2 x i64>*> [#uses=1]
 	store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/memcpy-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/memcpy-2.ll?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/memcpy-2.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/memcpy-2.ll Thu Apr  1 01:19:43 2010
@@ -1,15 +1,105 @@
-; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
-; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
+; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=SSE1
+; RUN: llc < %s -mattr=-sse       -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=NOSSE
 
 	%struct.ParmT = type { [25 x i8], i8, i8* }
 @.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"		; <[25 x i8]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
-
-define void @t(i32 %argc, i8** %argv) nounwind  {
+define void @t1(i32 %argc, i8** %argv) nounwind  {
 entry:
+; SSE2: t1:
+; SSE2: movaps _.str12, %xmm0
+; SSE2: movaps %xmm0
+; SSE2: movb $0
+; SSE2: movl $0
+; SSE2: movl $0
+
+; SSE1: t1:
+; SSE1: movaps _.str12, %xmm0
+; SSE1: movaps %xmm0
+; SSE1: movb $0
+; SSE1: movl $0
+; SSE1: movl $0
+
+; NOSSE: t1:
+; NOSSE: movb $0
+; NOSSE: movl $0
+; NOSSE: movl $0
+; NOSSE: movl $0
+; NOSSE: movl $0
+; NOSSE: movl $101
+; NOSSE: movl $1734438249
 	%parms.i = alloca [13 x %struct.ParmT]		; <[13 x %struct.ParmT]*> [#uses=1]
 	%parms1.i = getelementptr [13 x %struct.ParmT]* %parms.i, i32 0, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
 	call void @llvm.memcpy.i32( i8* %parms1.i, i8* getelementptr ([25 x i8]* @.str12, i32 0, i32 0), i32 25, i32 1 ) nounwind 
 	unreachable
 }
+
+;rdar://7774704
+%struct.s0 = type { [2 x double] }
+
+define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
+entry:
+; SSE2: t2:
+; SSE2: movaps (%eax), %xmm0
+; SSE2: movaps %xmm0, (%eax)
+
+; SSE1: t2:
+; SSE1: movaps (%eax), %xmm0
+; SSE1: movaps %xmm0, (%eax)
+
+; NOSSE: t2:
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+  %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
+  %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16)
+  ret void
+}
+
+define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
+entry:
+; SSE2: t3:
+; SSE2: movsd (%eax), %xmm0
+; SSE2: movsd 8(%eax), %xmm1
+; SSE2: movsd %xmm1, 8(%eax)
+; SSE2: movsd %xmm0, (%eax)
+
+; SSE1: t3:
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+
+; NOSSE: t3:
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+  %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
+  %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8)
+  ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/memset-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/memset-2.ll?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/memset-2.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/memset-2.ll Thu Apr  1 01:19:43 2010
@@ -1,47 +1,13 @@
-; RUN: llc < %s | not grep rep
-; RUN: llc < %s | grep memset
+; RUN: llc < %s | FileCheck %s
 
 target triple = "i386"
 
 declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
 
-define fastcc i32 @cli_scanzip(i32 %desc) nounwind {
+define fastcc void @t() nounwind {
 entry:
-	br label %bb8.i.i.i.i
-
-bb8.i.i.i.i:		; preds = %bb8.i.i.i.i, %entry
-	icmp eq i32 0, 0		; <i1>:0 [#uses=1]
-	br i1 %0, label %bb61.i.i.i, label %bb8.i.i.i.i
-
-bb32.i.i.i:		; preds = %bb61.i.i.i
-	ptrtoint i8* %tail.0.i.i.i to i32		; <i32>:1 [#uses=1]
-	sub i32 0, %1		; <i32>:2 [#uses=1]
-	icmp sgt i32 %2, 19		; <i1>:3 [#uses=1]
-	br i1 %3, label %bb34.i.i.i, label %bb61.i.i.i
-
-bb34.i.i.i:		; preds = %bb32.i.i.i
-	load i32* null, align 4		; <i32>:4 [#uses=1]
-	icmp eq i32 %4, 101010256		; <i1>:5 [#uses=1]
-	br i1 %5, label %bb8.i11.i.i.i, label %bb61.i.i.i
-
-bb8.i11.i.i.i:		; preds = %bb8.i11.i.i.i, %bb34.i.i.i
-	icmp eq i32 0, 0		; <i1>:6 [#uses=1]
-	br i1 %6, label %cli_dbgmsg.exit49.i, label %bb8.i11.i.i.i
-
-cli_dbgmsg.exit49.i:		; preds = %bb8.i11.i.i.i
-	icmp eq [32768 x i8]* null, null		; <i1>:7 [#uses=1]
-	br i1 %7, label %bb1.i28.i, label %bb8.i.i
-
-bb61.i.i.i:		; preds = %bb61.i.i.i, %bb34.i.i.i, %bb32.i.i.i, %bb8.i.i.i.i
-	%tail.0.i.i.i = getelementptr [1024 x i8]* null, i32 0, i32 0		; <i8*> [#uses=2]
-	load i8* %tail.0.i.i.i, align 1		; <i8>:8 [#uses=1]
-	icmp eq i8 %8, 80		; <i1>:9 [#uses=1]
-	br i1 %9, label %bb32.i.i.i, label %bb61.i.i.i
-
-bb1.i28.i:		; preds = %cli_dbgmsg.exit49.i
-	call void @llvm.memset.i32( i8* null, i8 0, i32 88, i32 1 ) nounwind
-	unreachable
-
-bb8.i.i:		; preds = %bb8.i.i, %cli_dbgmsg.exit49.i
-	br label %bb8.i.i
+; CHECK: t:
+; CHECK: call memset
+  call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
+  unreachable
 }

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/memset64-on-x86-32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/memset64-on-x86-32.ll?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/memset64-on-x86-32.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/memset64-on-x86-32.ll Thu Apr  1 01:19:43 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep stosl
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep movl | count 20
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq | count 10
 
 define void @bork() nounwind {

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/small-byval-memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/small-byval-memcpy.ll?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/small-byval-memcpy.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/small-byval-memcpy.ll Thu Apr  1 01:19:43 2010
@@ -1,4 +1,4 @@
-; RUN: llc < %s | not grep movs
+; RUN: llc < %s | grep movsd | count 8
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"

Modified: llvm/branches/Apple/Morbo/test/CodeGen/X86/unaligned-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Morbo/test/CodeGen/X86/unaligned-load.ll?rev=100091&r1=100090&r2=100091&view=diff
==============================================================================
--- llvm/branches/Apple/Morbo/test/CodeGen/X86/unaligned-load.ll (original)
+++ llvm/branches/Apple/Morbo/test/CodeGen/X86/unaligned-load.ll Thu Apr  1 01:19:43 2010
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2  -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=CORE2 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=COREI7 %s
 
 @.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
 @.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8
@@ -11,7 +12,11 @@
 bb:
   %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
   call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
-; CHECK: movups _.str3
+; CORE2: movsd _.str3+16
+; CORE2: movsd _.str3+8
+; CORE2: movsd _.str3
+
+; COREI7: movups _.str3
   br label %bb
 
 return:
@@ -20,8 +25,14 @@
 
 declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
 
-; CHECK: .align  3
-; CHECK-NEXT: _.str1:
-; CHECK-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
-; CHECK: .align 3
-; CHECK-NEXT: _.str3:
+; CORE2: .align  3
+; CORE2-NEXT: _.str1:
+; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
+; CORE2: .align 3
+; CORE2-NEXT: _.str3:
+
+; COREI7: .align  3
+; COREI7-NEXT: _.str1:
+; COREI7-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
+; COREI7: .align 3
+; COREI7-NEXT: _.str3: