[llvm-commits] [llvm] r61362 - /llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Dale Johannesen dalej at apple.com
Mon Dec 22 18:12:53 PST 2008


Author: johannes
Date: Mon Dec 22 20:12:52 2008
New Revision: 61362

URL: http://llvm.org/viewvc/llvm-project?rev=61362&view=rev
Log:
Fix the time regression I introduced in 464.h264ref with
my last patch to this file.

The issue there was that all uses of an IV inside a loop
are actually references to Base[IV*2], and there was one
use outside that was the same but LSR didn't see the base
or the scaling because it didn't recurse into uses outside
the loop; thus, it used base+IV*scale mode inside the loop
instead of pulling base out of the loop.  This was extra bad
because register pressure later forced both base and IV into
memory.  Doing that recursion, at least enough
to figure out addressing modes, is a good idea in general;
the change in AddUsersIfInteresting does this.  However,
there were side effects....

It is also possible for recursing outside the loop to
introduce another IV where there was only 1 before (if
the refs inside are not scaled and the ref outside is).
I don't think this is a common case, but it's in the testsuite.
It is right to be very aggressive about getting rid of
such introduced IVs (CheckForIVReuse and the handling of
nonzero RewriteFactor in StrengthReduceStridedIVUsers).
In the testcase in question the new IV produced this way
has both a nonconstant stride and a nonzero base, neither
of which was handled before.  And when inserting 
new code that feeds into a PHI, it's right to put such 
code at the original location rather than in the PHI's 
immediate predecessor(s) when the original location is outside 
the loop (a case that couldn't happen before)
(RewriteInstructionToUseNewBase); better to avoid making
multiple copies of it in this case.

Also, the mechanism for keeping SCEV's corresponding to GEP's
no longer works, as the GEP might change after its SCEV
is remembered, invalidating the SCEV, and we might get a bad
SCEV value when looking up the GEP again for a later loop.  
This also couldn't happen before, as we weren't recursing
into GEP's outside the loop.

I owe some testcases for this, want to get it in for nightly runs.


Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=61362&r1=61361&r2=61362&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon Dec 22 20:12:52 2008
@@ -130,6 +130,12 @@
     /// dependent on random ordering of pointers in the process.
     SmallVector<SCEVHandle, 16> StrideOrder;
 
+    /// GEPlist - A list of the GEP's that have been remembered in the SCEV
+    /// data structures.  SCEV does not know to update these when the operands
+    /// of the GEP are changed, which means we cannot leave them live across
+    /// loops.
+    SmallVector<GetElementPtrInst *, 16> GEPlist;
+
     /// CastedValues - As we need to cast values to uintptr_t, this keeps track
     /// of the casted version of each value.  This is accessed by
     /// getCastedVersionOf.
@@ -191,7 +197,7 @@
     bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
                            const SCEVHandle *&CondStride);
     bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
-    int64_t CheckForIVReuse(bool, bool, bool, const SCEVHandle&,
+    SCEVHandle CheckForIVReuse(bool, bool, bool, const SCEVHandle&,
                              IVExpr&, const Type*,
                              const std::vector<BasedUser>& UsersToProcess);
     bool ValidStride(bool, int64_t,
@@ -340,6 +346,7 @@
   }
 
   SE->setSCEV(GEP, GEPVal);
+  GEPlist.push_back(GEP);
   return GEPVal;
 }
 
@@ -508,14 +515,22 @@
     if (isa<PHINode>(User) && Processed.count(User))
       continue;
 
-    // If this is an instruction defined in a nested loop, or outside this loop,
-    // don't recurse into it.
+    // Descend recursively, but not into PHI nodes outside the current loop.
+    // It's important to see the entire expression outside the loop to get
+    // choices that depend on addressing mode use right, although we won't
+    // consider references ouside the loop in all cases.
+    // If User is already in Processed, we don't want to recurse into it again,
+    // but do want to record a second reference in the same instruction.
     bool AddUserToIVUsers = false;
     if (LI->getLoopFor(User->getParent()) != L) {
-      DOUT << "FOUND USER in other loop: " << *User
-           << "   OF SCEV: " << *ISE << "\n";
-      AddUserToIVUsers = true;
-    } else if (!AddUsersIfInteresting(User, L, Processed)) {
+      if (isa<PHINode>(User) || Processed.count(User) ||
+          !AddUsersIfInteresting(User, L, Processed)) {
+        DOUT << "FOUND USER in other loop: " << *User
+             << "   OF SCEV: " << *ISE << "\n";
+        AddUserToIVUsers = true;
+      }
+    } else if (Processed.count(User) || 
+               !AddUsersIfInteresting(User, L, Processed)) {
       DOUT << "FOUND USER: " << *User
            << "   OF SCEV: " << *ISE << "\n";
       AddUserToIVUsers = true;
@@ -704,34 +719,45 @@
   PHINode *PN = cast<PHINode>(Inst);
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     if (PN->getIncomingValue(i) == OperandValToReplace) {
-      // If this is a critical edge, split the edge so that we do not insert the
-      // code on all predecessor/successor paths.  We do this unless this is the
-      // canonical backedge for this loop, as this can make some inserted code
-      // be in an illegal position.
-      BasicBlock *PHIPred = PN->getIncomingBlock(i);
-      if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
-          (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
-        
-        // First step, split the critical edge.
-        SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
-            
-        // Next step: move the basic block.  In particular, if the PHI node
-        // is outside of the loop, and PredTI is in the loop, we want to
-        // move the block to be immediately before the PHI block, not
-        // immediately after PredTI.
-        if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
-          BasicBlock *NewBB = PN->getIncomingBlock(i);
-          NewBB->moveBefore(PN->getParent());
+      // If the original expression is outside the loop, put the replacement
+      // code in the same place as the original expression,
+      // which need not be an immediate predecessor of this PHI.  This way we 
+      // need only one copy of it even if it is referenced multiple times in
+      // the PHI.  We don't do this when the original expression is inside the
+      // loop because multiple copies sometimes do useful sinking of code in that
+      // case(?).
+      Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
+      if (L->contains(OldLoc->getParent())) {
+        // If this is a critical edge, split the edge so that we do not insert the
+        // code on all predecessor/successor paths.  We do this unless this is the
+        // canonical backedge for this loop, as this can make some inserted code
+        // be in an illegal position.
+        BasicBlock *PHIPred = PN->getIncomingBlock(i);
+        if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
+            (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
+
+          // First step, split the critical edge.
+          SplitCriticalEdge(PHIPred, PN->getParent(), P, false);
+
+          // Next step: move the basic block.  In particular, if the PHI node
+          // is outside of the loop, and PredTI is in the loop, we want to
+          // move the block to be immediately before the PHI block, not
+          // immediately after PredTI.
+          if (L->contains(PHIPred) && !L->contains(PN->getParent())) {
+            BasicBlock *NewBB = PN->getIncomingBlock(i);
+            NewBB->moveBefore(PN->getParent());
+          }
+
+          // Splitting the edge can reduce the number of PHI entries we have.
+          e = PN->getNumIncomingValues();
         }
-        
-        // Splitting the edge can reduce the number of PHI entries we have.
-        e = PN->getNumIncomingValues();
       }
-
       Value *&Code = InsertedCode[PN->getIncomingBlock(i)];
       if (!Code) {
         // Insert the code into the end of the predecessor block.
-        Instruction *InsertPt = PN->getIncomingBlock(i)->getTerminator();
+        Instruction *InsertPt = (L->contains(OldLoc->getParent())) ?
+                                PN->getIncomingBlock(i)->getTerminator() :
+                                OldLoc->getParent()->getTerminator();
         Code = InsertCodeForBaseAtPosition(NewBase, Rewriter, InsertPt, L);
 
         // Adjust the type back to match the PHI. Note that we can't use
@@ -1168,7 +1194,11 @@
 /// mode scale component and optional base reg. This allows the users of
 /// this stride to be rewritten as prev iv * factor. It returns 0 if no
 /// reuse is possible.  Factors can be negative on same targets, e.g. ARM.
-int64_t LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
+///
+/// If all uses are outside the loop, we don't require that all multiplies
+/// be folded into the addressing mode; a multiply (executed once) outside
+/// the loop is better than another IV within.  Well, usually.
+SCEVHandle LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
                                 bool AllUsesAreAddresses,
                                 bool AllUsesAreOutsideLoop,
                                 const SCEVHandle &Stride, 
@@ -1180,7 +1210,7 @@
          ++NewStride) {
       std::map<SCEVHandle, IVsOfOneStride>::iterator SI = 
                 IVsByStride.find(StrideOrder[NewStride]);
-      if (SI == IVsByStride.end()) 
+      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
         continue;
       int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
       if (SI->first != Stride &&
@@ -1202,11 +1232,53 @@
           if (II->Base->isZero() &&
               !RequiresTypeConversion(II->Base->getType(), Ty)) {
             IV = *II;
-            return Scale;
+            return SE->getIntegerSCEV(Scale, Stride->getType());
           }
     }
+  } else if (AllUsesAreOutsideLoop) {
+    // Accept nonconstant strides here; it is really really right to substitute
+    // an existing IV if we can.
+    for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
+         ++NewStride) {
+      std::map<SCEVHandle, IVsOfOneStride>::iterator SI = 
+                IVsByStride.find(StrideOrder[NewStride]);
+      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
+        continue;
+      int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
+      if (SI->first != Stride && SSInt != 1)
+        continue;
+      for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
+             IE = SI->second.IVs.end(); II != IE; ++II)
+        // Accept nonzero base here.
+        // Only reuse previous IV if it would not require a type conversion.
+        if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
+          IV = *II;
+          return Stride;
+        }
+    }
+    // Special case, old IV is -1*x and this one is x.  Can treat this one as
+    // -1*old.
+    for (unsigned NewStride = 0, e = StrideOrder.size(); NewStride != e;
+         ++NewStride) {
+      std::map<SCEVHandle, IVsOfOneStride>::iterator SI = 
+                IVsByStride.find(StrideOrder[NewStride]);
+      if (SI == IVsByStride.end()) 
+        continue;
+      if (SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
+        if (SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
+          if (Stride == ME->getOperand(1) &&
+              SC->getValue()->getSExtValue() == -1LL)
+            for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
+                   IE = SI->second.IVs.end(); II != IE; ++II)
+              // Accept nonzero base here.
+              // Only reuse previous IV if it would not require type conversion.
+              if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
+                IV = *II;
+                return SE->getIntegerSCEV(-1LL, Stride->getType());
+              }
+    }
   }
-  return 0;
+  return SE->getIntegerSCEV(0, Stride->getType());
 }
 
 /// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
@@ -1357,12 +1429,13 @@
   IVExpr   ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty),
                    SE->getIntegerSCEV(0, Type::Int32Ty),
                    0, 0);
-  int64_t RewriteFactor = 0;
-  RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
+  SCEVHandle RewriteFactor = 
+                  CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
                                   AllUsesAreOutsideLoop,
                                   Stride, ReuseIV, CommonExprs->getType(),
                                   UsersToProcess);
-  if (RewriteFactor != 0) {
+  if (!isa<SCEVConstant>(RewriteFactor) || 
+      !cast<SCEVConstant>(RewriteFactor)->isZero()) {
     DOUT << "BASED ON IV of STRIDE " << *ReuseIV.Stride
          << " and BASE " << *ReuseIV.Base << " :\n";
     NewPHI = ReuseIV.PHI;
@@ -1390,7 +1463,8 @@
   Value *CommonBaseV
     = PreheaderRewriter.expandCodeFor(CommonExprs, PreInsertPt);
 
-  if (RewriteFactor == 0) {
+  if (isa<SCEVConstant>(RewriteFactor) &&
+      cast<SCEVConstant>(RewriteFactor)->isZero()) {
     // Create a new Phi for this base, and stick it in the loop header.
     NewPHI = PHINode::Create(ReplacedTy, "iv.", PhiInsertBefore);
     ++NumInserted;
@@ -1537,9 +1611,17 @@
 
       // If we are reusing the iv, then it must be multiplied by a constant
       // factor take advantage of addressing mode scale component.
-      if (RewriteFactor != 0) {
-        RewriteExpr = SE->getMulExpr(SE->getIntegerSCEV(RewriteFactor,
-                                                        RewriteExpr->getType()),
+      if (!isa<SCEVConstant>(RewriteFactor) ||
+          !cast<SCEVConstant>(RewriteFactor)->isZero()) {
+        // If we're reusing an IV with a nonzero base (currently this happens
+        // only when all reuses are outside the loop) subtract that base here.
+        // The base has been used to initialize the PHI node but we don't want
+        // it here.
+        if (!ReuseIV.Base->isZero())
+          RewriteExpr = SE->getMinusSCEV(RewriteExpr, ReuseIV.Base);
+
+        // Multiply old variable, with base removed, by new scale factor.
+        RewriteExpr = SE->getMulExpr(RewriteFactor,
                                      RewriteExpr);
 
         // The common base is emitted in the loop preheader. But since we
@@ -2174,6 +2256,9 @@
   IVUsesByStride.clear();
   IVsByStride.clear();
   StrideOrder.clear();
+  for (unsigned i=0; i<GEPlist.size(); i++)
+    SE->deleteValueFromRecords(GEPlist[i]);
+  GEPlist.clear();  
 
   // Clean up after ourselves
   if (!DeadInsts.empty()) {





More information about the llvm-commits mailing list