[llvm-commits] [llvm] r164885 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/MergeConsecutiveStores.ll test/CodeGen/X86/loop-strength-reduce-2.ll test/CodeGen/X86/loop-strength-reduce-3.ll test/CodeGen/X86/loop-strength-reduce.ll

Sat Sep 29 08:56:29 PDT 2012

Okay thanks. 

On Sep 29, 2012, at 8:34 AM, Duncan Sands <baldrick at free.fr> wrote:

> Hi Nadav,
> 
>> Thanks for watching the build.  It looks like the failures are unrelated, and continue until 164891. I will check this again before re-applying.
> 
> the offending commit was either yours or mine, so before reverting I wasn't
> 100% sure it was yours.  However the dragonegg buildbots recovered when I
> reverted your commit, and there were no other commits that could explain the
> recovery.
> 
> Ciao, Duncan.
> 
>> 
>> Thanks,
>> Nadav
>> 
>> On Sep 29, 2012, at 3:28 AM, Duncan Sands <baldrick at free.fr> wrote:
>> 
>>> Hi Nadav, I reverted this in commit 164890 as it seems to be the cause of a slew
>>> of dragonegg self-host failures, and several clang buildbot failures too.
>>> 
>>> Ciao, Duncan.
>>> 
>>> On 29/09/12 08:33, Nadav Rotem wrote:
>>>> Author: nadav
>>>> Date: Sat Sep 29 01:33:25 2012
>>>> New Revision: 164885
>>>> 
>>>> URL: http://llvm.org/viewvc/llvm-project?rev=164885&view=rev
>>>> Log:
>>>> 
>>>> A DAGCombine optimization for merging consecutive stores. This optimization is not profitable in many cases
>>>> because moden processos can store multiple values in parallel, and preparing the consecutive store requires
>>>> some work.  We only handle these cases:
>>>> 
>>>> 1. Consecutive stores where the values and consecutive loads. For example:
>>>>   int a = p->a;
>>>>   int b = p->b;
>>>>   q->a = a;
>>>>   q->b = b;
>>>> 
>>>> 2. Consecutive stores where the values are constants. Foe example:
>>>>   q->a = 4;
>>>>   q->b = 5;
>>>> 
>>>> 
>>>> Added:
>>>>     llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
>>>> Modified:
>>>>     llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>>>>     llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll
>>>>     llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll
>>>>     llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll
>>>> 
>>>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=164885&r1=164884&r2=164885&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
>>>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Sep 29 01:33:25 2012
>>>> @@ -301,6 +301,10 @@
>>>>      /// looking for a better chain (aliasing node.)
>>>>      SDValue FindBetterChain(SDNode *N, SDValue Chain);
>>>> 
>>>> +    /// Merge consecutive store operations into a wide store.
>>>> +    /// \return True if some memory operations were changed.
>>>> +    bool MergeConsecutiveStores(StoreSDNode *N);
>>>> +
>>>>    public:
>>>>      DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
>>>>        : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
>>>> @@ -7437,6 +7441,248 @@
>>>>    return SDValue();
>>>>  }
>>>> 
>>>> +/// Returns the base pointer and an integer offset from that object.
>>>> +static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
>>>> +  if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
>>>> +    int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
>>>> +    SDValue Base = Ptr->getOperand(0);
>>>> +    return std::make_pair(Base, Offset);
>>>> +  }
>>>> +
>>>> +  return std::make_pair(Ptr, 0);
>>>> +}
>>>> +
>>>> +struct ConsecutiveMemoryChainSorter {
>>>> +  typedef std::pair<LSBaseSDNode*, int64_t> MemLink;
>>>> +  bool operator()(MemLink LHS, MemLink RHS) {
>>>> +    return LHS.second < RHS.second;
>>>> +  }
>>>> +};
>>>> +
>>>> +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
>>>> +  EVT MemVT = St->getMemoryVT();
>>>> +  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
>>>> +
>>>> +  // Don't handle vectors.
>>>> +  if (MemVT.isVector() || !MemVT.isSimple())
>>>> +    return false;
>>>> +
>>>> +  // Perform an early exit check. Do not bother looking at stored values that
>>>> +  // are not constants or loads.
>>>> +  SDValue StoredVal = St->getValue();
>>>> +  if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
>>>> +      !isa<LoadSDNode>(StoredVal))
>>>> +    return false;
>>>> +
>>>> +  // Is this a load-to-store or a const-store.
>>>> +  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
>>>> +
>>>> +  // Only look at ends of store chains.
>>>> +  SDValue Chain = SDValue(St, 1);
>>>> +  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
>>>> +    return false;
>>>> +
>>>> +  // This holds the base pointer and the offset in bytes from the base pointer.
>>>> +  std::pair<SDValue, int64_t> BasePtr =
>>>> +      GetPointerBaseAndOffset(St->getBasePtr());
>>>> +
>>>> +  // We must have a base and an offset.
>>>> +  if (!BasePtr.first.getNode())
>>>> +    return false;
>>>> +
>>>> +  // Do not handle stores to undef base pointers.
>>>> +  if (BasePtr.first.getOpcode() == ISD::UNDEF)
>>>> +    return false;
>>>> +
>>>> +  SmallVector<std::pair<StoreSDNode*, int64_t>, 8> StoreNodes;
>>>> +  // Walk up the chain and look for nodes with offsets from the same
>>>> +  // base pointer. Stop when reaching an instruction with a different kind
>>>> +  // or instruction which has a different base pointer.
>>>> +  StoreSDNode *Index = St;
>>>> +  while (Index) {
>>>> +    // If the chain has more than one use, then we can't reorder the mem ops.
>>>> +    if (Index != St && !SDValue(Index, 1)->hasOneUse())
>>>> +      break;
>>>> +
>>>> +    // Find the base pointer and offset for this memory node.
>>>> +    std::pair<SDValue, int64_t> Ptr =
>>>> +      GetPointerBaseAndOffset(Index->getBasePtr());
>>>> +
>>>> +    // Check that the base pointer is the same as the original one.
>>>> +    if (Ptr.first.getNode() != BasePtr.first.getNode())
>>>> +      break;
>>>> +
>>>> +    // Check that the alignment is the same.
>>>> +    if (Index->getAlignment() != St->getAlignment())
>>>> +      break;
>>>> +
>>>> +    // The memory operands must not be volatile.
>>>> +    if (Index->isVolatile() || Index->isIndexed())
>>>> +      break;
>>>> +
>>>> +    // No truncation.
>>>> +    if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
>>>> +      if (St->isTruncatingStore())
>>>> +        break;
>>>> +
>>>> +    // The stored memory type must be the same.
>>>> +    if (Index->getMemoryVT() != MemVT)
>>>> +      break;
>>>> +
>>>> +    // We found a potential memory operand to merge.
>>>> +    StoreNodes.push_back(std::make_pair(Index,Ptr.second));
>>>> +
>>>> +   // Move up the chain to the next memory operation.
>>>> +    Index = dyn_cast<StoreSDNode>(Index->getChain().getNode());
>>>> +  }
>>>> +
>>>> +  // Check if there is anything to merge.
>>>> +  if (StoreNodes.size() < 2)
>>>> +    return false;
>>>> +
>>>> +  // Remember which node is the earliest node in the chain.
>>>> +  LSBaseSDNode *EarliestOp = StoreNodes.back().first;
>>>> +
>>>> +  // Sort the memory operands according to their distance from the base pointer.
>>>> +  std::sort(StoreNodes.begin(), StoreNodes.end(),
>>>> +            ConsecutiveMemoryChainSorter());
>>>> +
>>>> +  // Scan the memory operations on the chain and find the first non-consecutive
>>>> +  // store memory address.
>>>> +  unsigned LastConsecutiveStore = 0;
>>>> +  int64_t StartAddress = StoreNodes[0].second;
>>>> +  for (unsigned i=1; i<StoreNodes.size(); ++i) {
>>>> +    int64_t CurrAddress = StoreNodes[i].second;
>>>> +    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
>>>> +      break;
>>>> +    LastConsecutiveStore = i;
>>>> +  }
>>>> +
>>>> +  // Store the constants into memory as one consecutive store.
>>>> +  if (!IsLoadSrc) {
>>>> +    unsigned LastConst = 0;
>>>> +    for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
>>>> +      SDValue StoredVal = StoreNodes[i].first->getValue();
>>>> +      bool IsConst = (isa<ConstantSDNode>(StoredVal) || isa<ConstantFPSDNode>(StoredVal));
>>>> +      if (!IsConst)
>>>> +        break;
>>>> +      LastConst = i;
>>>> +    }
>>>> +    unsigned NumElem = std::min(LastConsecutiveStore + 1, LastConst + 1);
>>>> +    if (NumElem < 2)
>>>> +      return false;
>>>> +
>>>> +    EVT JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
>>>> +    DebugLoc DL = StoreNodes[0].first->getDebugLoc();
>>>> +    SmallVector<SDValue, 8> Ops;
>>>> +
>>>> +    for (unsigned i = 0; i < NumElem ; ++i) {
>>>> +      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].first);
>>>> +      Ops.push_back(St->getValue());
>>>> +    }
>>>> +
>>>> +    SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL,
>>>> +                             JointMemOpVT, &Ops[0], Ops.size());
>>>> +
>>>> +    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, BV,
>>>> +                                    EarliestOp->getBasePtr(),
>>>> +                                    EarliestOp->getPointerInfo(), false, false,
>>>> +                                    EarliestOp->getAlignment());
>>>> +
>>>> +    for (unsigned i = 0; i < NumElem ; ++i) {
>>>> +      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first);
>>>> +      CombineTo(St, NewStore);
>>>> +    }
>>>> +    return true;
>>>> +  }
>>>> +
>>>> +  // Look for load nodes wich are used by the stored values.
>>>> +  SmallVector<std::pair<LoadSDNode*, int64_t>, 8> LoadNodes;
>>>> +
>>>> +  // Find acceptible loads. Loads need to have the same chain (token factor),
>>>> +  // must not be zext, volatile, indexed, and they must be consecutive.
>>>> +  SDValue LdBasePtr;
>>>> +  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
>>>> +    LoadSDNode *Ld = dyn_cast<LoadSDNode>(StoreNodes[i].first->getValue());
>>>> +    if (!Ld) break;
>>>> +
>>>> +    // Loads must only have one use.
>>>> +    if (!Ld->hasNUsesOfValue(1, 0))
>>>> +      break;
>>>> +
>>>> +    // Check that the alignment is the same as the stores.
>>>> +    if (Ld->getAlignment() != St->getAlignment())
>>>> +      break;
>>>> +
>>>> +    // The memory operands must not be volatile.
>>>> +    if (Ld->isVolatile() || Ld->isIndexed())
>>>> +      break;
>>>> +
>>>> +    if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
>>>> +      break;
>>>> +
>>>> +    // The stored memory type must be the same.
>>>> +    if (Ld->getMemoryVT() != MemVT)
>>>> +      break;
>>>> +
>>>> +    std::pair<SDValue, int64_t> LdPtr =
>>>> +    GetPointerBaseAndOffset(Ld->getBasePtr());
>>>> +
>>>> +    // If this is not the first ptr that we check.
>>>> +    if (LdBasePtr.getNode()) {
>>>> +      // The base ptr must be the same,
>>>> +      if (LdPtr.first != LdBasePtr)
>>>> +        break;
>>>> +    } else {
>>>> +      LdBasePtr = LdPtr.first;
>>>> +    }
>>>> +
>>>> +    // We found a potential memory operand to merge.
>>>> +    LoadNodes.push_back(std::make_pair(Ld, LdPtr.second));
>>>> +  }
>>>> +
>>>> +  if (LoadNodes.size() < 2)
>>>> +    return false;
>>>> +
>>>> +  // Scan the memory operations on the chain and find the first non-consecutive
>>>> +  // load memory address.
>>>> +  unsigned LastConsecutiveLoad = 0;
>>>> +  StartAddress = LoadNodes[0].second;
>>>> +  for (unsigned i=1; i<LoadNodes.size(); ++i) {
>>>> +    int64_t CurrAddress = LoadNodes[i].second;
>>>> +    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
>>>> +      break;
>>>> +    LastConsecutiveLoad = i;
>>>> +  }
>>>> +
>>>> +  unsigned NumElem =
>>>> +    std::min(LastConsecutiveStore + 1, LastConsecutiveLoad + 1);
>>>> +
>>>> +  EVT JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
>>>> +  DebugLoc LoadDL = LoadNodes[0].first->getDebugLoc();
>>>> +  DebugLoc StoreDL = StoreNodes[0].first->getDebugLoc();
>>>> +
>>>> +  LoadSDNode *FirstLoad = LoadNodes[0].first;
>>>> +  SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
>>>> +                                FirstLoad->getChain(),
>>>> +                                FirstLoad->getBasePtr(),
>>>> +                                FirstLoad->getPointerInfo(),
>>>> +                                false, false, false,
>>>> +                                FirstLoad->getAlignment());
>>>> +
>>>> +  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
>>>> +                                  EarliestOp->getBasePtr(),
>>>> +                                  EarliestOp->getPointerInfo(), false, false,
>>>> +                                  EarliestOp->getAlignment());
>>>> +
>>>> +  for (unsigned i = 0; i < NumElem ; ++i) {
>>>> +    StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first);
>>>> +    CombineTo(St, NewStore);
>>>> +  }
>>>> +
>>>> +  return true;
>>>> +}
>>>> +
>>>>  SDValue DAGCombiner::visitSTORE(SDNode *N) {
>>>>    StoreSDNode *ST  = cast<StoreSDNode>(N);
>>>>    SDValue Chain = ST->getChain();
>>>> @@ -7638,6 +7884,12 @@
>>>>                               ST->getAlignment());
>>>>    }
>>>> 
>>>> +
>>>> +  // Only perform this optimization before the types are legal, because we
>>>> +  // don't want to generate illegal types in this optimization.
>>>> +  if (!LegalTypes && MergeConsecutiveStores(ST))
>>>> +    return SDValue(N, 0);
>>>> +
>>>>    return ReduceLoadOpStoreWidth(N);
>>>>  }
>>>> 
>>>> 
>>>> Added: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=164885&view=auto
>>>> ==============================================================================
>>>> --- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (added)
>>>> +++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Sat Sep 29 01:33:25 2012
>>>> @@ -0,0 +1,150 @@
>>>> +; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
>>>> +
>>>> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>>>> +target triple = "x86_64-apple-macosx10.8.0"
>>>> +
>>>> +%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
>>>> +
>>>> + at a = common global [10000 x %struct.A] zeroinitializer, align 8
>>>> +
>>>> +; Move all of the constants using a single vector store.
>>>> +; CHECK: merge_const_store
>>>> +; CHECK: movq %xmm0
>>>> +; CHECK: ret
>>>> +define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
>>>> +  %1 = icmp sgt i32 %count, 0
>>>> +  br i1 %1, label %.lr.ph, label %._crit_edge
>>>> +.lr.ph:
>>>> +  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
>>>> +  %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
>>>> +  %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
>>>> +  store i8 1, i8* %2, align 1
>>>> +  %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
>>>> +  store i8 2, i8* %3, align 1
>>>> +  %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
>>>> +  store i8 3, i8* %4, align 1
>>>> +  %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
>>>> +  store i8 4, i8* %5, align 1
>>>> +  %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
>>>> +  store i8 5, i8* %6, align 1
>>>> +  %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
>>>> +  store i8 6, i8* %7, align 1
>>>> +  %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
>>>> +  store i8 7, i8* %8, align 1
>>>> +  %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
>>>> +  store i8 8, i8* %9, align 1
>>>> +  %10 = add nsw i32 %i.02, 1
>>>> +  %11 = getelementptr inbounds %struct.A* %.01, i64 1
>>>> +  %exitcond = icmp eq i32 %10, %count
>>>> +  br i1 %exitcond, label %._crit_edge, label %.lr.ph
>>>> +._crit_edge:
>>>> +  ret void
>>>> +}
>>>> +
>>>> +; Move the first 4 constants as a single vector. Move the rest as scalars.
>>>> +; CHECK: merge_nonconst_store
>>>> +; CHECK: movd %xmm0
>>>> +; CHECK: movb
>>>> +; CHECK: movb
>>>> +; CHECK: movb
>>>> +; CHECK: movb
>>>> +; CHECK: ret
>>>> +define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
>>>> +  %1 = icmp sgt i32 %count, 0
>>>> +  br i1 %1, label %.lr.ph, label %._crit_edge
>>>> +.lr.ph:
>>>> +  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
>>>> +  %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
>>>> +  %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
>>>> +  store i8 1, i8* %2, align 1
>>>> +  %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
>>>> +  store i8 2, i8* %3, align 1
>>>> +  %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
>>>> +  store i8 3, i8* %4, align 1
>>>> +  %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
>>>> +  store i8 4, i8* %5, align 1
>>>> +  %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
>>>> +  store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
>>>> +  %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
>>>> +  store i8 6, i8* %7, align 1
>>>> +  %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
>>>> +  store i8 7, i8* %8, align 1
>>>> +  %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
>>>> +  store i8 8, i8* %9, align 1
>>>> +  %10 = add nsw i32 %i.02, 1
>>>> +  %11 = getelementptr inbounds %struct.A* %.01, i64 1
>>>> +  %exitcond = icmp eq i32 %10, %count
>>>> +  br i1 %exitcond, label %._crit_edge, label %.lr.ph
>>>> +._crit_edge:
>>>> +  ret void
>>>> +}
>>>> +
>>>> +
>>>> +;CHECK: merge_loads
>>>> +; load:
>>>> +;CHECK: movw
>>>> +; store:
>>>> +;CHECK: movw
>>>> +;CHECK: ret
>>>> +define void @merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
>>>> +  %1 = icmp sgt i32 %count, 0
>>>> +  br i1 %1, label %.lr.ph, label %._crit_edge
>>>> +
>>>> +.lr.ph:                                           ; preds = %0
>>>> +  %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
>>>> +  %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
>>>> +  br label %4
>>>> +
>>>> +; <label>:4                                       ; preds = %4, %.lr.ph
>>>> +  %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
>>>> +  %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
>>>> +  %5 = load i8* %2, align 1
>>>> +  %6 = load i8* %3, align 1
>>>> +  %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
>>>> +  store i8 %5, i8* %7, align 1
>>>> +  %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
>>>> +  store i8 %6, i8* %8, align 1
>>>> +  %9 = add nsw i32 %i.02, 1
>>>> +  %10 = getelementptr inbounds %struct.A* %.01, i64 1
>>>> +  %exitcond = icmp eq i32 %9, %count
>>>> +  br i1 %exitcond, label %._crit_edge, label %4
>>>> +
>>>> +._crit_edge:                                      ; preds = %4, %0
>>>> +  ret void
>>>> +}
>>>> +
>>>> +; The loads and the stores are interleved. Can't merge them.
>>>> +;CHECK: no_merge_loads
>>>> +;CHECK: movb
>>>> +;CHECK: movb
>>>> +;CHECK: movb
>>>> +;CHECK: movb
>>>> +;CHECK: ret
>>>> +define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
>>>> +  %1 = icmp sgt i32 %count, 0
>>>> +  br i1 %1, label %.lr.ph, label %._crit_edge
>>>> +
>>>> +.lr.ph:                                           ; preds = %0
>>>> +  %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
>>>> +  %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
>>>> +  br label %a4
>>>> +
>>>> +a4:                                       ; preds = %4, %.lr.ph
>>>> +  %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
>>>> +  %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
>>>> +  %a5 = load i8* %2, align 1
>>>> +  %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
>>>> +  store i8 %a5, i8* %a7, align 1
>>>> +  %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
>>>> +  %a6 = load i8* %3, align 1
>>>> +  store i8 %a6, i8* %a8, align 1
>>>> +  %a9 = add nsw i32 %i.02, 1
>>>> +  %a10 = getelementptr inbounds %struct.A* %.01, i64 1
>>>> +  %exitcond = icmp eq i32 %a9, %count
>>>> +  br i1 %exitcond, label %._crit_edge, label %a4
>>>> +
>>>> +._crit_edge:                                      ; preds = %4, %0
>>>> +  ret void
>>>> +}
>>>> +
>>>> +
>>>> 
>>>> Modified: llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll?rev=164885&r1=164884&r2=164885&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll (original)
>>>> +++ llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll Sat Sep 29 01:33:25 2012
>>>> @@ -1,20 +1,18 @@
>>>> -; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
>>>> -; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC
>>>> +; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=pic | FileCheck %s -check-prefix=PIC
>>>> +; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s -check-prefix=STATIC
>>>>  ;
>>>>  ; Make sure the common loop invariant A is hoisted up to preheader,
>>>>  ; since too many registers are needed to subsume it into the addressing modes.
>>>>  ; It's safe to sink A in when it's not pic.
>>>> 
>>>>  ; PIC:  align
>>>> -; PIC:  movl  $4, -4([[REG:%e[a-z]+]])
>>>> -; PIC:  movl  $5, ([[REG]])
>>>> +; PIC:  movlpd %xmm0, -4([[REG:%e[a-z]+]])
>>>>  ; PIC:  addl  $4, [[REG]]
>>>>  ; PIC:  decl  {{%e[[a-z]+}}
>>>>  ; PIC:  jne
>>>> 
>>>>  ; STATIC: align
>>>> -; STATIC: movl  $4, -4(%ecx)
>>>> -; STATIC: movl  $5, (%ecx)
>>>> +; STATIC: movlpd %xmm0, -4(%ecx)
>>>>  ; STATIC: addl  $4, %ecx
>>>>  ; STATIC: decl  %eax
>>>>  ; STATIC: jne
>>>> 
>>>> Modified: llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll?rev=164885&r1=164884&r2=164885&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll (original)
>>>> +++ llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll Sat Sep 29 01:33:25 2012
>>>> @@ -1,8 +1,7 @@
>>>> -; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s
>>>> +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck %s
>>>> 
>>>>  ; CHECK: align
>>>> -; CHECK: movl  $4, -4(%ecx)
>>>> -; CHECK: movl  $5, (%ecx)
>>>> +; CHECK: movlpd %xmm0, -4(%ecx)
>>>>  ; CHECK: addl  $4, %ecx
>>>>  ; CHECK: decl  %eax
>>>>  ; CHECK: jne
>>>> 
>>>> Modified: llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll?rev=164885&r1=164884&r2=164885&view=diff
>>>> ==============================================================================
>>>> --- llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll (original)
>>>> +++ llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll Sat Sep 29 01:33:25 2012
>>>> @@ -1,8 +1,7 @@
>>>> -; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
>>>> +; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s
>>>> 
>>>>  ; CHECK: align
>>>> -; CHECK: movl  $4, -4(%ecx)
>>>> -; CHECK: movl  $5, (%ecx)
>>>> +; CHECK: movlpd %xmm0, -4(%ecx)
>>>>  ; CHECK: addl  $4, %ecx
>>>>  ; CHECK: decl  %eax
>>>>  ; CHECK: jne
>>>> 
>>>> 
>>>> _______________________________________________
>>>> llvm-commits mailing list
>>>> llvm-commits at cs.uiuc.edu
>>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>>> 
>>> 
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>> 
>