[llvm-commits] CVS: llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp

Sun Dec 7 23:36:01 PST 2003

Changes in directory llvm/lib/Transforms/Scalar:

TailRecursionElimination.cpp updated: 1.8 -> 1.9

---
Log message:

Cleanup and restructure the code to make it easier to read and maintain.
The only functionality change is that we now implement:
  Regression/Transforms/TailCallElim/intervening-inst.ll

Which is really kinda pointless, because it means that trivially dead code
does not interfere with -tce, but trivially dead code probably wouldn't be
around anytime when this pass is run anyway.

The point of including this change it to support other more aggressive
transformations when we have the analysis capabilities to do so.



---
Diffs of the changes:  (+126 -53)

Index: llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
diff -u llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp:1.8 llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp:1.9

--- llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp:1.8	Fri Nov 21 10:52:01 2003
+++ llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp	Sun Dec  7 23:34:54 2003
@@ -7,22 +7,40 @@
 // 
 //===----------------------------------------------------------------------===//
 //
-// This file implements tail recursion elimination.
+// This file transforms calls of the current function (self recursion) followed
+// by a return instruction with a branch to the entry of the function, creating
+// a loop.  This pass also implements the following extensions to the basic
+// algorithm:
 //
-// Caveats: The algorithm implemented is trivially simple.  There are several
-// improvements that could be made:
+//  1. Trivial instructions between the call and return do not prevent the
+//     transformation from taking place, though currently the analysis cannot
+//     support moving any really useful instructions (only dead ones).
 //
-//  1. If the function has any alloca instructions, these instructions will not
-//     remain in the entry block of the function.  Doing this requires analysis
-//     to prove that the alloca is not reachable by the recursively invoked
-//     function call.
+// There are several improvements that could be made:
+//
+//  1. If the function has any alloca instructions, these instructions will be
+//     moved out of the entry block of the function, causing them to be
+//     evaluated each time through the tail recursion.  Safely keeping allocas
+//     in the entry block requires analysis to proves that the tail-called
+//     function does not read or write the stack object.
 //  2. Tail recursion is only performed if the call immediately preceeds the
-//     return instruction.  Would it be useful to generalize this somehow?
+//     return instruction.  It's possible that there could be a jump between
+//     the call and the return.
 //  3. TRE is only performed if the function returns void or if the return
 //     returns the result returned by the call.  It is possible, but unlikely,
 //     that the return returns something else (like constant 0), and can still
 //     be TRE'd.  It can be TRE'd if ALL OTHER return instructions in the
 //     function return the exact same value.
+//  4. There can be intervening operations between the call and the return that
+//     prevent the TRE from occurring.  For example, there could be GEP's and
+//     stores to memory that will not be read or written by the call.  This
+//     requires some substantial analysis (such as with DSA) to prove safe to
+//     move ahead of the call, but doing so could allow many more TREs to be
+//     performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
+//  5. This pass could transform functions that are prevented from being tail
+//     recursive by a commutative expression to use an accumulator helper
+//     function, thus compiling the typical naive factorial or 'fib'
+//     implementation into efficient code.
 //
 //===----------------------------------------------------------------------===//
 
@@ -32,7 +50,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "Support/Statistic.h"
-
 using namespace llvm;
 
 namespace {
@@ -40,6 +57,11 @@
 
   struct TailCallElim : public FunctionPass {
     virtual bool runOnFunction(Function &F);
+
+  private:
+    bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
+                               std::vector<PHINode*> &ArgumentPHIs);
+    bool CanMoveAboveCall(Instruction *I, CallInst *CI);
   };
   RegisterOpt<TailCallElim> X("tailcallelim", "Tail Call Elimination");
 }
@@ -62,50 +84,101 @@
   // Loop over the function, looking for any returning blocks...
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator()))
-      if (Ret != BB->begin())  // Make sure there is something before the ret...
-        if (CallInst *CI = dyn_cast<CallInst>(Ret->getPrev()))
-          // Make sure the tail call is to the current function, and that the
-          // return either returns void or returns the value computed by the
-          // call.
-          if (CI->getCalledFunction() == &F &&
-              (Ret->getNumOperands() == 0 || Ret->getReturnValue() == CI)) {
-            // Ohh, it looks like we found a tail call, is this the first?
-            if (!OldEntry) {
-              // Ok, so this is the first tail call we have found in this
-              // function.  Insert a new entry block into the function, allowing
-              // us to branch back to the old entry block.
-              OldEntry = &F.getEntryBlock();
-              BasicBlock *NewEntry = new BasicBlock("tailrecurse", OldEntry);
-              new BranchInst(OldEntry, NewEntry);
-              
-              // Now that we have created a new block, which jumps to the entry
-              // block, insert a PHI node for each argument of the function.
-              // For now, we initialize each PHI to only have the real arguments
-              // which are passed in.
-              Instruction *InsertPos = OldEntry->begin();
-              for (Function::aiterator I = F.abegin(), E = F.aend(); I!=E; ++I){
-                PHINode *PN = new PHINode(I->getType(), I->getName()+".tr",
-                                          InsertPos);
-                I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
-                PN->addIncoming(I, NewEntry);
-                ArgumentPHIs.push_back(PN);
-              }
-            }
-            
-            // Ok, now that we know we have a pseudo-entry block WITH all of the
-            // required PHI nodes, add entries into the PHI node for the actual
-            // parameters passed into the tail-recursive call.
-            for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i)
-              ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB);
-
-            // Now that all of the PHI nodes are in place, remove the call and
-            // ret instructions, replacing them with an unconditional branch.
-            new BranchInst(OldEntry, CI);
-            BB->getInstList().pop_back();  // Remove return.
-            BB->getInstList().pop_back();  // Remove call.
-            MadeChange = true;
-            NumEliminated++;
-          }
+      MadeChange |= ProcessReturningBlock(Ret, OldEntry, ArgumentPHIs);
   
   return MadeChange;
+}
+
+
+// CanMoveAboveCall - Return true if it is safe to move the specified
+// instruction from after the call to before the call, assuming that all
+// instructions between the call and this instruction are movable.
+//
+bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
+  // FIXME: We can move load/store/call/free instructions above the call if the
+  // call does not mod/ref the memory location being processed.
+  if (I->mayWriteToMemory() || isa<LoadInst>(I))
+    return false;
+
+  // Otherwise, if this is a side-effect free instruction, check to make sure
+  // that it does not use the return value of the call.  If it doesn't use the
+  // return value of the call, it must only use things that are defined before
+  // the call, or movable instructions between the call and the instruction
+  // itself.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (I->getOperand(i) == CI)
+      return false;
+  return true;
+}
+
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+                                         std::vector<PHINode*> &ArgumentPHIs) {
+  BasicBlock *BB = Ret->getParent();
+  Function *F = BB->getParent();
+
+  if (&BB->front() == Ret) // Make sure there is something before the ret...
+    return false;
+
+  // Scan backwards from the return, checking to see if there is a tail call in
+  // this block.  If so, set CI to it.
+  CallInst *CI;
+  BasicBlock::iterator BBI = Ret;
+  while (1) {
+    CI = dyn_cast<CallInst>(BBI);
+    if (CI && CI->getCalledFunction() == F)
+      break;
+
+    if (BBI == BB->begin())
+      return false;          // Didn't find a potential tail call.
+    --BBI;
+  }
+
+  // Ok, we found a potential tail call.  We can currently only transform the
+  // tail call if all of the instructions between the call and the return are
+  // movable to above the call itself, leaving the call next to the return.
+  // Check that this is the case now.
+  for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
+    if (!CanMoveAboveCall(BBI, CI))
+      return false;   // Cannot move this instruction out of the way.
+
+  // We can only transform call/return pairs that either ignore the return value
+  // of the call and return void, or return the value returned by the tail call.
+  if (Ret->getNumOperands() != 0 && Ret->getReturnValue() != CI)
+    return false;
+
+  // OK! We can transform this tail call.  If this is the first one found,
+  // create the new entry block, allowing us to branch back to the old entry.
+  if (OldEntry == 0) {
+    OldEntry = &F->getEntryBlock();
+    std::string OldName = OldEntry->getName(); OldEntry->setName("tailrecurse");
+    BasicBlock *NewEntry = new BasicBlock(OldName, OldEntry);
+    new BranchInst(OldEntry, NewEntry);
+    
+    // Now that we have created a new block, which jumps to the entry
+    // block, insert a PHI node for each argument of the function.
+    // For now, we initialize each PHI to only have the real arguments
+    // which are passed in.
+    Instruction *InsertPos = OldEntry->begin();
+    for (Function::aiterator I = F->abegin(), E = F->aend(); I != E; ++I) {
+      PHINode *PN = new PHINode(I->getType(), I->getName()+".tr", InsertPos);
+      I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
+      PN->addIncoming(I, NewEntry);
+      ArgumentPHIs.push_back(PN);
+    }
+  }
+  
+  // Ok, now that we know we have a pseudo-entry block WITH all of the
+  // required PHI nodes, add entries into the PHI node for the actual
+  // parameters passed into the tail-recursive call.
+  for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i)
+    ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB);
+  
+  // Now that all of the PHI nodes are in place, remove the call and
+  // ret instructions, replacing them with an unconditional branch.
+  new BranchInst(OldEntry, Ret);
+  BB->getInstList().erase(Ret);  // Remove return.
+  BB->getInstList().erase(CI);   // Remove call.
+  NumEliminated++;
+  return true;
 }