[llvm-commits] [llvm] r164885 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/MergeConsecutiveStores.ll test/CodeGen/X86/loop-strength-reduce-2.ll test/CodeGen/X86/loop-strength-reduce-3.ll test/CodeGen/X86/loop-strength-reduce.ll

Fri Sep 28 23:33:26 PDT 2012

Author: nadav
Date: Sat Sep 29 01:33:25 2012
New Revision: 164885

URL: http://llvm.org/viewvc/llvm-project?rev=164885&view=rev
Log:

A DAGCombine optimization for merging consecutive stores. This optimization is not profitable in many cases
because moden processos can store multiple values in parallel, and preparing the consecutive store requires
some work.  We only handle these cases:

1. Consecutive stores where the values and consecutive loads. For example:
  int a = p->a;
  int b = p->b;
  q->a = a;
  q->b = b;

2. Consecutive stores where the values are constants. Foe example:
  q->a = 4;
  q->b = 5;


Added:
    llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll
    llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll
    llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=164885&r1=164884&r2=164885&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Sep 29 01:33:25 2012
@@ -301,6 +301,10 @@
     /// looking for a better chain (aliasing node.)
     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 
+    /// Merge consecutive store operations into a wide store.
+    /// \return True if some memory operations were changed.
+    bool MergeConsecutiveStores(StoreSDNode *N);
+
   public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
       : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
@@ -7437,6 +7441,248 @@
   return SDValue();
 }
 
+/// Returns the base pointer and an integer offset from that object.
+static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) {
+  if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) {
+    int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+    SDValue Base = Ptr->getOperand(0);
+    return std::make_pair(Base, Offset);
+  }
+
+  return std::make_pair(Ptr, 0);
+}
+
+struct ConsecutiveMemoryChainSorter {
+  typedef std::pair<LSBaseSDNode*, int64_t> MemLink;
+  bool operator()(MemLink LHS, MemLink RHS) {
+    return LHS.second < RHS.second;
+  }
+};
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+  EVT MemVT = St->getMemoryVT();
+  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
+
+  // Don't handle vectors.
+  if (MemVT.isVector() || !MemVT.isSimple())
+    return false;
+
+  // Perform an early exit check. Do not bother looking at stored values that
+  // are not constants or loads.
+  SDValue StoredVal = St->getValue();
+  if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
+      !isa<LoadSDNode>(StoredVal))
+    return false;
+
+  // Is this a load-to-store or a const-store.
+  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+
+  // Only look at ends of store chains.
+  SDValue Chain = SDValue(St, 1);
+  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+    return false;
+
+  // This holds the base pointer and the offset in bytes from the base pointer.
+  std::pair<SDValue, int64_t> BasePtr =
+      GetPointerBaseAndOffset(St->getBasePtr());
+
+  // We must have a base and an offset.
+  if (!BasePtr.first.getNode())
+    return false;
+
+  // Do not handle stores to undef base pointers.
+  if (BasePtr.first.getOpcode() == ISD::UNDEF)
+    return false;
+
+  SmallVector<std::pair<StoreSDNode*, int64_t>, 8> StoreNodes;
+  // Walk up the chain and look for nodes with offsets from the same
+  // base pointer. Stop when reaching an instruction with a different kind
+  // or instruction which has a different base pointer.
+  StoreSDNode *Index = St;
+  while (Index) {
+    // If the chain has more than one use, then we can't reorder the mem ops.
+    if (Index != St && !SDValue(Index, 1)->hasOneUse())
+      break;
+
+    // Find the base pointer and offset for this memory node.
+    std::pair<SDValue, int64_t> Ptr =
+      GetPointerBaseAndOffset(Index->getBasePtr());
+
+    // Check that the base pointer is the same as the original one.
+    if (Ptr.first.getNode() != BasePtr.first.getNode())
+      break;
+
+    // Check that the alignment is the same.
+    if (Index->getAlignment() != St->getAlignment())
+      break;
+
+    // The memory operands must not be volatile.
+    if (Index->isVolatile() || Index->isIndexed())
+      break;
+
+    // No truncation.
+    if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
+      if (St->isTruncatingStore())
+        break;
+
+    // The stored memory type must be the same.
+    if (Index->getMemoryVT() != MemVT)
+      break;
+
+    // We found a potential memory operand to merge.
+    StoreNodes.push_back(std::make_pair(Index,Ptr.second));
+
+   // Move up the chain to the next memory operation.
+    Index = dyn_cast<StoreSDNode>(Index->getChain().getNode());
+  }
+
+  // Check if there is anything to merge.
+  if (StoreNodes.size() < 2)
+    return false;
+
+  // Remember which node is the earliest node in the chain.
+  LSBaseSDNode *EarliestOp = StoreNodes.back().first;
+
+  // Sort the memory operands according to their distance from the base pointer.
+  std::sort(StoreNodes.begin(), StoreNodes.end(),
+            ConsecutiveMemoryChainSorter());
+
+  // Scan the memory operations on the chain and find the first non-consecutive
+  // store memory address.
+  unsigned LastConsecutiveStore = 0;
+  int64_t StartAddress = StoreNodes[0].second;
+  for (unsigned i=1; i<StoreNodes.size(); ++i) {
+    int64_t CurrAddress = StoreNodes[i].second;
+    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+      break;
+    LastConsecutiveStore = i;
+  }
+
+  // Store the constants into memory as one consecutive store.
+  if (!IsLoadSrc) {
+    unsigned LastConst = 0;
+    for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+      SDValue StoredVal = StoreNodes[i].first->getValue();
+      bool IsConst = (isa<ConstantSDNode>(StoredVal) || isa<ConstantFPSDNode>(StoredVal));
+      if (!IsConst)
+        break;
+      LastConst = i;
+    }
+    unsigned NumElem = std::min(LastConsecutiveStore + 1, LastConst + 1);
+    if (NumElem < 2)
+      return false;
+
+    EVT JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+    DebugLoc DL = StoreNodes[0].first->getDebugLoc();
+    SmallVector<SDValue, 8> Ops;
+
+    for (unsigned i = 0; i < NumElem ; ++i) {
+      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].first);
+      Ops.push_back(St->getValue());
+    }
+
+    SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL,
+                             JointMemOpVT, &Ops[0], Ops.size());
+
+    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, BV,
+                                    EarliestOp->getBasePtr(),
+                                    EarliestOp->getPointerInfo(), false, false,
+                                    EarliestOp->getAlignment());
+
+    for (unsigned i = 0; i < NumElem ; ++i) {
+      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first);
+      CombineTo(St, NewStore);
+    }
+    return true;
+  }
+
+  // Look for load nodes wich are used by the stored values.
+  SmallVector<std::pair<LoadSDNode*, int64_t>, 8> LoadNodes;
+
+  // Find acceptible loads. Loads need to have the same chain (token factor),
+  // must not be zext, volatile, indexed, and they must be consecutive.
+  SDValue LdBasePtr;
+  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+    LoadSDNode *Ld = dyn_cast<LoadSDNode>(StoreNodes[i].first->getValue());
+    if (!Ld) break;
+
+    // Loads must only have one use.
+    if (!Ld->hasNUsesOfValue(1, 0))
+      break;
+
+    // Check that the alignment is the same as the stores.
+    if (Ld->getAlignment() != St->getAlignment())
+      break;
+
+    // The memory operands must not be volatile.
+    if (Ld->isVolatile() || Ld->isIndexed())
+      break;
+
+    if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
+      break;
+
+    // The stored memory type must be the same.
+    if (Ld->getMemoryVT() != MemVT)
+      break;
+
+    std::pair<SDValue, int64_t> LdPtr =
+    GetPointerBaseAndOffset(Ld->getBasePtr());
+
+    // If this is not the first ptr that we check.
+    if (LdBasePtr.getNode()) {
+      // The base ptr must be the same,
+      if (LdPtr.first != LdBasePtr)
+        break;
+    } else {
+      LdBasePtr = LdPtr.first;
+    }
+
+    // We found a potential memory operand to merge.
+    LoadNodes.push_back(std::make_pair(Ld, LdPtr.second));
+  }
+
+  if (LoadNodes.size() < 2)
+    return false;
+
+  // Scan the memory operations on the chain and find the first non-consecutive
+  // load memory address.
+  unsigned LastConsecutiveLoad = 0;
+  StartAddress = LoadNodes[0].second;
+  for (unsigned i=1; i<LoadNodes.size(); ++i) {
+    int64_t CurrAddress = LoadNodes[i].second;
+    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+      break;
+    LastConsecutiveLoad = i;
+  }
+
+  unsigned NumElem =
+    std::min(LastConsecutiveStore + 1, LastConsecutiveLoad + 1);
+
+  EVT JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+  DebugLoc LoadDL = LoadNodes[0].first->getDebugLoc();
+  DebugLoc StoreDL = StoreNodes[0].first->getDebugLoc();
+
+  LoadSDNode *FirstLoad = LoadNodes[0].first;
+  SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
+                                FirstLoad->getChain(),
+                                FirstLoad->getBasePtr(),
+                                FirstLoad->getPointerInfo(),
+                                false, false, false,
+                                FirstLoad->getAlignment());
+
+  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
+                                  EarliestOp->getBasePtr(),
+                                  EarliestOp->getPointerInfo(), false, false,
+                                  EarliestOp->getAlignment());
+
+  for (unsigned i = 0; i < NumElem ; ++i) {
+    StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].first);
+    CombineTo(St, NewStore);
+  }
+
+  return true;
+}
+
 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   StoreSDNode *ST  = cast<StoreSDNode>(N);
   SDValue Chain = ST->getChain();
@@ -7638,6 +7884,12 @@
                              ST->getAlignment());
   }
 
+
+  // Only perform this optimization before the types are legal, because we
+  // don't want to generate illegal types in this optimization.
+  if (!LegalTypes && MergeConsecutiveStores(ST))
+    return SDValue(N, 0);
+
   return ReduceLoadOpStoreWidth(N);
 }
 

Added: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=164885&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (added)
+++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Sat Sep 29 01:33:25 2012
@@ -0,0 +1,150 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
+
+ at a = common global [10000 x %struct.A] zeroinitializer, align 8
+
+; Move all of the constants using a single vector store.
+; CHECK: merge_const_store
+; CHECK: movq %xmm0
+; CHECK: ret
+define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %count, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+.lr.ph:
+  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
+  %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
+  %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+  store i8 1, i8* %2, align 1
+  %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+  store i8 2, i8* %3, align 1
+  %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
+  store i8 3, i8* %4, align 1
+  %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
+  store i8 4, i8* %5, align 1
+  %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
+  store i8 5, i8* %6, align 1
+  %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
+  store i8 6, i8* %7, align 1
+  %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
+  store i8 7, i8* %8, align 1
+  %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
+  store i8 8, i8* %9, align 1
+  %10 = add nsw i32 %i.02, 1
+  %11 = getelementptr inbounds %struct.A* %.01, i64 1
+  %exitcond = icmp eq i32 %10, %count
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+._crit_edge:
+  ret void
+}
+
+; Move the first 4 constants as a single vector. Move the rest as scalars.
+; CHECK: merge_nonconst_store
+; CHECK: movd %xmm0
+; CHECK: movb
+; CHECK: movb
+; CHECK: movb
+; CHECK: movb
+; CHECK: ret
+define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %count, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+.lr.ph:
+  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
+  %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
+  %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+  store i8 1, i8* %2, align 1
+  %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+  store i8 2, i8* %3, align 1
+  %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
+  store i8 3, i8* %4, align 1
+  %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
+  store i8 4, i8* %5, align 1
+  %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
+  store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
+  %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
+  store i8 6, i8* %7, align 1
+  %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
+  store i8 7, i8* %8, align 1
+  %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
+  store i8 8, i8* %9, align 1
+  %10 = add nsw i32 %i.02, 1
+  %11 = getelementptr inbounds %struct.A* %.01, i64 1
+  %exitcond = icmp eq i32 %10, %count
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+._crit_edge:
+  ret void
+}
+
+
+;CHECK: merge_loads
+; load:
+;CHECK: movw
+; store:
+;CHECK: movw
+;CHECK: ret
+define void @merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %count, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0
+  %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
+  %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
+  br label %4
+
+; <label>:4                                       ; preds = %4, %.lr.ph
+  %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
+  %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
+  %5 = load i8* %2, align 1
+  %6 = load i8* %3, align 1
+  %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+  store i8 %5, i8* %7, align 1
+  %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+  store i8 %6, i8* %8, align 1
+  %9 = add nsw i32 %i.02, 1
+  %10 = getelementptr inbounds %struct.A* %.01, i64 1
+  %exitcond = icmp eq i32 %9, %count
+  br i1 %exitcond, label %._crit_edge, label %4
+
+._crit_edge:                                      ; preds = %4, %0
+  ret void
+}
+
+; The loads and the stores are interleved. Can't merge them.
+;CHECK: no_merge_loads
+;CHECK: movb
+;CHECK: movb
+;CHECK: movb
+;CHECK: movb
+;CHECK: ret
+define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %count, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0
+  %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
+  %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
+  br label %a4
+
+a4:                                       ; preds = %4, %.lr.ph
+  %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
+  %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
+  %a5 = load i8* %2, align 1
+  %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+  store i8 %a5, i8* %a7, align 1
+  %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+  %a6 = load i8* %3, align 1
+  store i8 %a6, i8* %a8, align 1
+  %a9 = add nsw i32 %i.02, 1
+  %a10 = getelementptr inbounds %struct.A* %.01, i64 1
+  %exitcond = icmp eq i32 %a9, %count
+  br i1 %exitcond, label %._crit_edge, label %a4
+
+._crit_edge:                                      ; preds = %4, %0
+  ret void
+}
+
+

Modified: llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll?rev=164885&r1=164884&r2=164885&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/loop-strength-reduce-2.ll Sat Sep 29 01:33:25 2012
@@ -1,20 +1,18 @@
-; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
-; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s -check-prefix=STATIC
 ;
 ; Make sure the common loop invariant A is hoisted up to preheader,
 ; since too many registers are needed to subsume it into the addressing modes.
 ; It's safe to sink A in when it's not pic.
 
 ; PIC:  align
-; PIC:  movl  $4, -4([[REG:%e[a-z]+]])
-; PIC:  movl  $5, ([[REG]])
+; PIC:  movlpd %xmm0, -4([[REG:%e[a-z]+]])
 ; PIC:  addl  $4, [[REG]]
 ; PIC:  decl  {{%e[[a-z]+}}
 ; PIC:  jne
 
 ; STATIC: align
-; STATIC: movl  $4, -4(%ecx)
-; STATIC: movl  $5, (%ecx)
+; STATIC: movlpd %xmm0, -4(%ecx)
 ; STATIC: addl  $4, %ecx
 ; STATIC: decl  %eax
 ; STATIC: jne

Modified: llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll?rev=164885&r1=164884&r2=164885&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/loop-strength-reduce-3.ll Sat Sep 29 01:33:25 2012
@@ -1,8 +1,7 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 -relocation-model=dynamic-no-pic | FileCheck %s
 
 ; CHECK: align
-; CHECK: movl  $4, -4(%ecx)
-; CHECK: movl  $5, (%ecx)
+; CHECK: movlpd %xmm0, -4(%ecx)
 ; CHECK: addl  $4, %ecx
 ; CHECK: decl  %eax
 ; CHECK: jne

Modified: llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll?rev=164885&r1=164884&r2=164885&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll (original)
+++ llvm/trunk/test/CodeGen/X86/loop-strength-reduce.ll Sat Sep 29 01:33:25 2012
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7 -relocation-model=static | FileCheck %s
 
 ; CHECK: align
-; CHECK: movl  $4, -4(%ecx)
-; CHECK: movl  $5, (%ecx)
+; CHECK: movlpd %xmm0, -4(%ecx)
 ; CHECK: addl  $4, %ecx
 ; CHECK: decl  %eax
 ; CHECK: jne