[llvm-commits] [llvm] r71485 - in /llvm/trunk: lib/Transforms/Scalar/LoopStrengthReduce.cpp test/CodeGen/X86/lsr-loop-exit-cond.ll test/CodeGen/X86/lsr-negative-stride.ll test/CodeGen/X86/remat-mov-1.ll test/CodeGen/X86/remat-mov0.ll

Mon May 11 15:33:02 PDT 2009

Author: evancheng
Date: Mon May 11 17:33:01 2009
New Revision: 71485

URL: http://llvm.org/viewvc/llvm-project?rev=71485&view=rev
Log:
Teach LSR to optimize more loop exit compares, i.e. change them to use postinc iv value. Previously LSR would only optimize those which are in the loop latch block. However, if LSR can prove it is safe (and profitable), it's now possible to change those not in the latch blocks to use postinc values.

Also, if the compare is the only use, LSR would place the iv increment instruction before the compare instead in the latch.

Added:
    llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
    llvm/trunk/test/CodeGen/X86/remat-mov-1.ll
      - copied, changed from r71444, llvm/trunk/test/CodeGen/X86/remat-mov0.ll
Removed:
    llvm/trunk/test/CodeGen/X86/remat-mov0.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/trunk/test/CodeGen/X86/lsr-negative-stride.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=71485&r1=71484&r2=71485&view=diff

==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon May 11 17:33:01 2009
@@ -43,6 +43,7 @@
 STATISTIC(NumEliminated,  "Number of strides eliminated");
 STATISTIC(NumShadow,      "Number of Shadow IVs optimized");
 STATISTIC(NumImmSunk,     "Number of common expr immediates sunk into uses");
+STATISTIC(NumLoopCond,    "Number of loop terminating conds optimized");
 
 static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
                                        cl::init(false),
@@ -122,6 +123,10 @@
     /// particular stride.
     std::map<SCEVHandle, IVsOfOneStride> IVsByStride;
 
+    /// StrideNoReuse - Keep track of all the strides whose ivs cannot be
+    /// reused (nor should they be rewritten to reuse other strides).
+    SmallSet<SCEVHandle, 4> StrideNoReuse;
+
     /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable:
     /// We use this to iterate over the IVUsesByStride collection without being
     /// dependent on random ordering of pointers in the process.
@@ -184,8 +189,8 @@
     SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&,
                              IVExpr&, const Type*,
                              const std::vector<BasedUser>& UsersToProcess);
-    bool ValidStride(bool, int64_t,
-                     const std::vector<BasedUser>& UsersToProcess);
+    bool ValidScale(bool, int64_t,
+                    const std::vector<BasedUser>& UsersToProcess);
     SCEVHandle CollectIVUsers(const SCEVHandle &Stride,
                               IVUsersOfOneStride &Uses,
                               Loop *L,
@@ -213,6 +218,7 @@
                                   SCEVHandle Stride,
                                   SCEVHandle CommonExprs,
                                   Value *CommonBaseV,
+                                  Instruction *IVIncInsertPt,
                                   const Loop *L,
                                   SCEVExpander &PreheaderRewriter);
     void StrengthReduceStridedIVUsers(const SCEVHandle &Stride,
@@ -799,7 +805,7 @@
 /// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
 /// loop varying to the Imm operand.
 static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
-                                            Loop *L, ScalarEvolution *SE) {
+                                             Loop *L, ScalarEvolution *SE) {
   if (Val->isLoopInvariant(L)) return;  // Nothing to do.
   
   if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
@@ -1122,16 +1128,15 @@
   return Result;
 }
 
-/// ValidStride - Check whether the given Scale is valid for all loads and 
+/// ValidScale - Check whether the given Scale is valid for all loads and 
 /// stores in UsersToProcess.
 ///
-bool LoopStrengthReduce::ValidStride(bool HasBaseReg,
-                               int64_t Scale, 
+bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
                                const std::vector<BasedUser>& UsersToProcess) {
   if (!TLI)
     return true;
 
-  for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) {
     // If this is a load or other access, pass the type of the access in.
     const Type *AccessTy = Type::VoidTy;
     if (isAddressUse(UsersToProcess[i].Inst,
@@ -1186,13 +1191,17 @@
                                 const SCEVHandle &Stride, 
                                 IVExpr &IV, const Type *Ty,
                                 const std::vector<BasedUser>& UsersToProcess) {
+  if (StrideNoReuse.count(Stride))
+    return SE->getIntegerSCEV(0, Stride->getType());
+
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
     int64_t SInt = SC->getValue()->getSExtValue();
     for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
          ++NewStride) {
       std::map<SCEVHandle, IVsOfOneStride>::iterator SI = 
                 IVsByStride.find(StrideOrder[NewStride]);
-      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
+      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
+          StrideNoReuse.count(SI->first))
         continue;
       int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
       if (SI->first != Stride &&
@@ -1206,7 +1215,7 @@
       // multiplications.
       if (Scale == 1 ||
           (AllUsesAreAddresses &&
-           ValidStride(HasBaseReg, Scale, UsersToProcess)))
+           ValidScale(HasBaseReg, Scale, UsersToProcess)))
         for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
                IE = SI->second.IVs.end(); II != IE; ++II)
           // FIXME: Only handle base == 0 for now.
@@ -1302,7 +1311,7 @@
     // field of the use, so that we don't try to use something before it is
     // computed.
     MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
-                                    UsersToProcess.back().Imm, L, SE);
+                                     UsersToProcess.back().Imm, L, SE);
     assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
            "Base value is not loop invariant!");
   }
@@ -1452,6 +1461,7 @@
 /// Return the created phi node.
 ///
 static PHINode *InsertAffinePhi(SCEVHandle Start, SCEVHandle Step,
+                                Instruction *IVIncInsertPt,
                                 const Loop *L,
                                 SCEVExpander &Rewriter) {
   assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!");
@@ -1475,16 +1485,17 @@
     IncAmount = Rewriter.SE.getNegativeSCEV(Step);
 
   // Insert an add instruction right before the terminator corresponding
-  // to the back-edge.
+  // to the back-edge or just before the only use. The location is determined
+  // by the caller and passed in as IVIncInsertPt.
   Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty,
                                         Preheader->getTerminator());
   Instruction *IncV;
   if (isNegative) {
     IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next",
-                                     LatchBlock->getTerminator());
+                                     IVIncInsertPt);
   } else {
     IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next",
-                                     LatchBlock->getTerminator());
+                                     IVIncInsertPt);
   }
   if (!isa<ConstantInt>(StepV)) ++NumVariable;
 
@@ -1541,6 +1552,7 @@
 
   // Rewrite the UsersToProcess records, creating a separate PHI for each
   // unique Base value.
+  Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator();
   for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
     // TODO: The uses are grouped by base, but not sorted. We arbitrarily
     // pick the first Imm value here to start with, and adjust it for the
@@ -1548,7 +1560,7 @@
     SCEVHandle Imm = UsersToProcess[i].Imm;
     SCEVHandle Base = UsersToProcess[i].Base;
     SCEVHandle Start = SE->getAddExpr(CommonExprs, Base, Imm);
-    PHINode *Phi = InsertAffinePhi(Start, Stride, L,
+    PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
                                    PreheaderRewriter);
     // Loop over all the users with the same base.
     do {
@@ -1561,6 +1573,18 @@
   }
 }
 
+/// FindIVIncInsertPt - Return the location to insert the increment instruction.
+/// If the only use if a use of postinc value, (must be the loop termination
+/// condition), then insert it just before the use.
+static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
+                                      const Loop *L) {
+  if (UsersToProcess.size() == 1 &&
+      UsersToProcess[0].isUseOfPostIncrementedValue &&
+      L->contains(UsersToProcess[0].Inst->getParent()))
+    return UsersToProcess[0].Inst;
+  return L->getLoopLatch()->getTerminator();
+}
+
 /// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the
 /// given users to share.
 ///
@@ -1570,12 +1594,13 @@
                                          SCEVHandle Stride,
                                          SCEVHandle CommonExprs,
                                          Value *CommonBaseV,
+                                         Instruction *IVIncInsertPt,
                                          const Loop *L,
                                          SCEVExpander &PreheaderRewriter) {
   DOUT << "  Inserting new PHI:\n";
 
   PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
-                                 Stride, L,
+                                 Stride, IVIncInsertPt, L,
                                  PreheaderRewriter);
 
   // Remember this in case a later stride is multiple of this.
@@ -1590,8 +1615,8 @@
   DOUT << "\n";
 }
 
-/// PrepareToStrengthReduceWithNewPhi - Prepare for the given users to reuse
-/// an induction variable with a stride that is a factor of the current
+/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
+/// reuse an induction variable with a stride that is a factor of the current
 /// induction variable.
 ///
 void
@@ -1727,6 +1752,7 @@
   BasicBlock  *Preheader = L->getLoopPreheader();
   Instruction *PreInsertPt = Preheader->getTerminator();
   BasicBlock *LatchBlock = L->getLoopLatch();
+  Instruction *IVIncInsertPt = LatchBlock->getTerminator();
 
   Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
 
@@ -1755,13 +1781,15 @@
                                     AllUsesAreOutsideLoop,
                                     Stride, ReuseIV, ReplacedTy,
                                     UsersToProcess);
-    if (isa<SCEVConstant>(RewriteFactor) &&
-        cast<SCEVConstant>(RewriteFactor)->isZero())
-      PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
-                                        CommonBaseV, L, PreheaderRewriter);
-    else
+    if (!RewriteFactor->isZero())
       PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV,
                                                ReuseIV, PreInsertPt);
+    else {
+      IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L);
+      PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
+                                        CommonBaseV, IVIncInsertPt,
+                                        L, PreheaderRewriter);
+    }
   }
 
   // Process all the users now, replacing their strided uses with
@@ -1800,7 +1828,12 @@
       // FIXME: Use emitted users to emit other users.
       BasedUser &User = UsersToProcess.back();
 
-      DOUT << "    Examining use ";
+      DOUT << "    Examining ";
+      if (User.isUseOfPostIncrementedValue)
+        DOUT << "postinc";
+      else
+        DOUT << "preinc";
+      DOUT << " use ";
       DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace,
                            /*PrintType=*/false));
       DOUT << " in Inst: " << *(User.Inst);
@@ -1810,11 +1843,12 @@
       Value *RewriteOp = User.Phi;
       if (User.isUseOfPostIncrementedValue) {
         RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock);
-
         // If this user is in the loop, make sure it is the last thing in the
-        // loop to ensure it is dominated by the increment.
-        if (L->contains(User.Inst->getParent()))
-          User.Inst->moveBefore(LatchBlock->getTerminator());
+        // loop to ensure it is dominated by the increment. In case it's the
+        // only use of the iv, the increment instruction is already before the
+        // use.
+        if (L->contains(User.Inst->getParent()) && User.Inst != IVIncInsertPt)
+          User.Inst->moveBefore(IVIncInsertPt);
       }
 
       SCEVHandle RewriteExpr = SE->getUnknown(RewriteOp);
@@ -2085,7 +2119,7 @@
       // if it's likely the new stride uses will be rewritten using the
       // stride of the compare instruction.
       if (AllUsesAreAddresses &&
-          ValidStride(!CommonExprs->isZero(), Scale, UsersToProcess))
+          ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
         continue;
 
       // If scale is negative, use swapped predicate unless it's testing
@@ -2304,8 +2338,8 @@
       if (!DestTy) continue;
 
       if (TLI) {
-        /* If target does not support DestTy natively then do not apply
-           this transformation. */
+        // If target does not support DestTy natively then do not apply
+        // this transformation.
         MVT DVT = TLI->getValueType(DestTy);
         if (!TLI->isTypeLegal(DVT)) continue;
       }
@@ -2380,8 +2414,6 @@
   // TODO: implement optzns here.
 
   OptimizeShadowIV(L);
-
-  OptimizeLoopTermCond(L);
 }
 
 /// OptimizeLoopTermCond - Change loop terminating condition to use the 
@@ -2391,23 +2423,78 @@
   // can, we want to change it to use a post-incremented version of its
   // induction variable, to allow coalescing the live ranges for the IV into
   // one register value.
-  PHINode *SomePHI = cast<PHINode>(L->getHeader()->begin());
-  BasicBlock  *Preheader = L->getLoopPreheader();
-  BasicBlock *LatchBlock =
-   SomePHI->getIncomingBlock(SomePHI->getIncomingBlock(0) == Preheader);
-  BranchInst *TermBr = dyn_cast<BranchInst>(LatchBlock->getTerminator());
-  if (!TermBr || TermBr->isUnconditional() || 
-      !isa<ICmpInst>(TermBr->getCondition()))
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  BasicBlock *ExitBlock = L->getExitingBlock();
+  if (!ExitBlock)
+    // Multiple exits, just look at the exit in the latch block if there is one.
+    ExitBlock = LatchBlock;
+  BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator());
+  if (!TermBr)
+    return;
+  if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
     return;
-  ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
 
   // Search IVUsesByStride to find Cond's IVUse if there is one.
   IVStrideUse *CondUse = 0;
   const SCEVHandle *CondStride = 0;
-
+  ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
   if (!FindIVUserForCond(Cond, CondUse, CondStride))
     return; // setcc doesn't use the IV.
 
+  if (ExitBlock != LatchBlock) {
+    if (!Cond->hasOneUse())
+      // See below, we don't want the condition to be cloned.
+      return;
+
+    // If exiting block is the latch block, we know it's safe and profitable to
+    // transform the icmp to use post-inc iv. Otherwise do so only if it would
+    // not reuse another iv and its iv would be reused by other uses. We are
+    // optimizing for the case where the icmp is the only use of the iv.
+    IVUsersOfOneStride &StrideUses = IVUsesByStride[*CondStride];
+    for (unsigned i = 0, e = StrideUses.Users.size(); i != e; ++i) {
+      if (StrideUses.Users[i].User == Cond)
+        continue;
+      if (!StrideUses.Users[i].isUseOfPostIncrementedValue)
+        return;
+    }
+
+    // FIXME: This is expensive, and worse still ChangeCompareStride does a
+    // similar check. Can we perform all the icmp related transformations after
+    // StrengthReduceStridedIVUsers?
+    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
+      int64_t SInt = SC->getValue()->getSExtValue();
+      for (unsigned NewStride = 0, ee = StrideOrder.size(); NewStride != ee;
+           ++NewStride) {
+        std::map<SCEVHandle, IVUsersOfOneStride>::iterator SI = 
+          IVUsesByStride.find(StrideOrder[NewStride]);
+        if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
+          continue;
+        int64_t SSInt =
+          cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
+        if (SSInt == SInt)
+          return; // This can definitely be reused.
+        if (unsigned(abs(SSInt)) < SInt || (SSInt % SInt) != 0)
+          continue;
+        int64_t Scale = SSInt / SInt;
+        bool AllUsesAreAddresses = true;
+        bool AllUsesAreOutsideLoop = true;
+        std::vector<BasedUser> UsersToProcess;
+        SCEVHandle CommonExprs = CollectIVUsers(SI->first, SI->second, L,
+                                                AllUsesAreAddresses,
+                                                AllUsesAreOutsideLoop,
+                                                UsersToProcess);
+        // Avoid rewriting the compare instruction with an iv of new stride
+        // if it's likely the new stride uses will be rewritten using the
+        // stride of the compare instruction.
+        if (AllUsesAreAddresses &&
+            ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
+          return;
+      }
+    }
+
+    StrideNoReuse.insert(*CondStride);
+  }
+
   // If the trip count is computed in terms of an smax (due to ScalarEvolution
   // being unable to find a sufficient guard, for example), change the loop
   // comparison to use SLT instead of NE.
@@ -2415,7 +2502,8 @@
 
   // If possible, change stride and operands of the compare instruction to
   // eliminate one stride.
-  Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+  if (ExitBlock == LatchBlock)
+    Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
 
   // It's possible for the setcc instruction to be anywhere in the loop, and
   // possible for it to have multiple users.  If it is not immediately before
@@ -2431,7 +2519,7 @@
       
       // Clone the IVUse, as the old use still exists!
       IVUsesByStride[*CondStride].addUser(CondUse->Offset, Cond,
-                                         CondUse->OperandValToReplace);
+                                          CondUse->OperandValToReplace);
       CondUse = &IVUsesByStride[*CondStride].Users.back();
     }
   }
@@ -2442,6 +2530,8 @@
   CondUse->Offset = SE->getMinusSCEV(CondUse->Offset, *CondStride);
   CondUse->isUseOfPostIncrementedValue = true;
   Changed = true;
+
+  ++NumLoopCond;
 }
 
 // OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
@@ -2582,6 +2672,11 @@
     // computation of some other indvar to decide when to terminate the loop.
     OptimizeIndvars(L);
 
+    // Change loop terminating condition to use the postinc iv when possible
+    // and optimize loop terminating compare. FIXME: Move this after
+    // StrengthReduceStridedIVUsers?
+    OptimizeLoopTermCond(L);
+
     // FIXME: We can shrink overlarge IV's here.  e.g. if the code has
     // computation in i64 values and the target doesn't support i64, demote
     // the computation to 32-bit if safe.
@@ -2616,6 +2711,7 @@
   IVUsesByStride.clear();
   IVsByStride.clear();
   StrideOrder.clear();
+  StrideNoReuse.clear();
 
   // Clean up after ourselves
   if (!DeadInsts.empty())

Added: llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll?rev=71485&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll (added)
+++ llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll Mon May 11 17:33:01 2009
@@ -0,0 +1,134 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | %prcontext decq 1 | grep jne
+
+ at Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
+ at Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
+ at Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
+
+define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
+entry:
+	%0 = load i32* %rk, align 4		; <i32> [#uses=1]
+	%1 = getelementptr i32* %rk, i64 1		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
+	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
+	br label %bb
+
+bb:		; preds = %bb1, %entry
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
+	%s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ]		; <i32> [#uses=2]
+	%s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ]		; <i32> [#uses=2]
+	%tmp18 = shl i64 %indvar, 4		; <i64> [#uses=4]
+	%rk26 = bitcast i32* %rk to i8*		; <i8*> [#uses=6]
+	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=1]
+	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
+	%8 = and i32 %7, 255		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
+	%12 = getelementptr i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
+	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
+	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%15 = xor i32 %11, %6		; <i32> [#uses=1]
+	%16 = xor i32 %15, %14		; <i32> [#uses=3]
+	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
+	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
+	%19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
+	%20 = load i32* %19, align 4		; <i32> [#uses=1]
+	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
+	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
+	%23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
+	%24 = load i32* %23, align 4		; <i32> [#uses=1]
+	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
+	%25 = getelementptr i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = xor i32 %24, %20		; <i32> [#uses=1]
+	%29 = xor i32 %28, %27		; <i32> [#uses=4]
+	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
+	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
+	%32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
+	%33 = load i32* %32, align 4		; <i32> [#uses=2]
+	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%ctg2.sum31 = add i64 %tmp18, 16		; <i64> [#uses=1]
+	%34 = getelementptr i8* %rk26, i64 %ctg2.sum31		; <i8*> [#uses=1]
+	%35 = bitcast i8* %34 to i32*		; <i32*> [#uses=1]
+	%36 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%37 = and i32 %36, 255		; <i32> [#uses=1]
+	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
+	%39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = load i32* %35, align 4		; <i32> [#uses=1]
+	%42 = xor i32 %40, %33		; <i32> [#uses=1]
+	%43 = xor i32 %42, %41		; <i32> [#uses=1]
+	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = and i32 %16, 255		; <i32> [#uses=1]
+	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
+	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
+	%52 = getelementptr i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
+	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
+	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%55 = xor i32 %51, %47		; <i32> [#uses=1]
+	%56 = xor i32 %55, %54		; <i32> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br label %bb
+
+bb2:		; preds = %bb
+	%tmp10 = shl i64 %tmp.16, 4		; <i64> [#uses=2]
+	%ctg2.sum = add i64 %tmp10, 16		; <i64> [#uses=1]
+	%tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum		; <i8*> [#uses=1]
+	%57 = bitcast i8* %tmp1213 to i32*		; <i32*> [#uses=1]
+	%58 = and i32 %33, -16777216		; <i32> [#uses=1]
+	%59 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%60 = and i32 %59, 255		; <i32> [#uses=1]
+	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
+	%62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
+	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
+	%65 = or i32 %64, %58		; <i32> [#uses=1]
+	%66 = load i32* %57, align 4		; <i32> [#uses=1]
+	%67 = xor i32 %65, %66		; <i32> [#uses=2]
+	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
+	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
+	%70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
+	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
+	%73 = and i32 %16, 255		; <i32> [#uses=1]
+	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
+	%75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4		; <i32> [#uses=1]
+	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
+	%78 = or i32 %77, %72		; <i32> [#uses=1]
+	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
+	%79 = getelementptr i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
+	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%82 = xor i32 %78, %81		; <i32> [#uses=2]
+	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
+	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
+	store i8 %84, i8* %out, align 1
+	%85 = lshr i32 %67, 16		; <i32> [#uses=1]
+	%86 = trunc i32 %85 to i8		; <i8> [#uses=1]
+	%87 = getelementptr i8* %out, i64 1		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = getelementptr i8* %out, i64 4		; <i8*> [#uses=1]
+	%89 = lshr i32 %82, 24		; <i32> [#uses=1]
+	%90 = trunc i32 %89 to i8		; <i8> [#uses=1]
+	store i8 %90, i8* %88, align 1
+	%91 = lshr i32 %82, 16		; <i32> [#uses=1]
+	%92 = trunc i32 %91 to i8		; <i8> [#uses=1]
+	%93 = getelementptr i8* %out, i64 5		; <i8*> [#uses=1]
+	store i8 %92, i8* %93, align 1
+	ret void
+}

Modified: llvm/trunk/test/CodeGen/X86/lsr-negative-stride.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-negative-stride.ll?rev=71485&r1=71484&r2=71485&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-negative-stride.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-negative-stride.ll Mon May 11 17:33:01 2009
@@ -16,7 +16,7 @@
 ;}
 
 
-define i32 @t(i32 %a, i32 %b) {
+define i32 @t(i32 %a, i32 %b) nounwind {
 entry:
 	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
 	br i1 %tmp1434, label %bb17, label %bb.outer

Copied: llvm/trunk/test/CodeGen/X86/remat-mov-1.ll (from r71444, llvm/trunk/test/CodeGen/X86/remat-mov0.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/remat-mov-1.ll?p2=llvm/trunk/test/CodeGen/X86/remat-mov-1.ll&p1=llvm/trunk/test/CodeGen/X86/remat-mov0.ll&r1=71444&r2=71485&rev=71485&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/remat-mov0.ll (original)
+++ llvm/trunk/test/CodeGen/X86/remat-mov-1.ll Mon May 11 17:33:01 2009
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2
+; RUN: llvm-as < %s | llc -march=x86 | grep 4294967295 | grep mov | count 2
 
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }

Removed: llvm/trunk/test/CodeGen/X86/remat-mov0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/remat-mov0.ll?rev=71484&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/remat-mov0.ll (original)
+++ llvm/trunk/test/CodeGen/X86/remat-mov0.ll (removed)
@@ -1,40 +0,0 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2
-
-	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-	%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }
-	%struct._CompT = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, i8, %struct._PixT*, %struct._CompT*, i8, %struct._CompT* }
-	%struct._PixT = type { i32, i32, %struct._PixT* }
-	%struct.__sFILEX = type opaque
-	%struct.__sbuf = type { i8*, i32 }
-
-declare fastcc void @MergeComponents(%struct._CompT*, %struct._CompT*, %struct._CompT*, %struct._CompT**, %struct.ImgT*) nounwind 
-
-define fastcc void @MergeToLeft(%struct._CompT* %comp, %struct._CompT** %head, %struct.ImgT* %img) nounwind  {
-entry:
-	br label %bb208
-
-bb105:		; preds = %bb200
-	br i1 false, label %bb197, label %bb149
-
-bb149:		; preds = %bb105
-	%tmp151 = getelementptr %struct._CompT* %comp, i32 0, i32 0		; <i32*> [#uses=1]
-	br label %bb193
-
-bb193:		; preds = %bb184, %bb149
-	%tmp196 = load i32* %tmp151, align 4		; <i32> [#uses=1]
-	br label %bb197
-
-bb197:		; preds = %bb193, %bb105
-	%last_comp.0 = phi i32 [ %tmp196, %bb193 ], [ 0, %bb105 ]		; <i32> [#uses=0]
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
-	br label %bb200
-
-bb200:		; preds = %bb208, %bb197
-	%indvar = phi i32 [ 0, %bb208 ], [ %indvar.next, %bb197 ]		; <i32> [#uses=2]
-	%xm.0 = sub i32 %indvar, 0		; <i32> [#uses=1]
-	%tmp202 = icmp slt i32 %xm.0, 1		; <i1> [#uses=1]
-	br i1 %tmp202, label %bb105, label %bb208
-
-bb208:		; preds = %bb200, %entry
-	br label %bb200
-}