[llvm-commits] CVS: llvm-poolalloc/lib/PoolAllocate/AccessTrace.cpp Heuristic.cpp Heuristic.h Makefile PointerCompress.cpp PoolAllocate.cpp PoolAllocate.h PoolOptimize.cpp TransformFunctionBody.cpp

Wed May 18 12:56:44 PDT 2005

Changes in directory llvm-poolalloc/lib/PoolAllocate:

AccessTrace.cpp updated: 1.2 -> 1.3
Heuristic.cpp updated: 1.12 -> 1.13
Heuristic.h updated: 1.5 -> 1.6
Makefile updated: 1.6 -> 1.7
PointerCompress.cpp updated: 1.61 -> 1.62
PoolAllocate.cpp updated: 1.119 -> 1.120
PoolAllocate.h updated: 1.46 -> 1.47
PoolOptimize.cpp updated: 1.4 -> 1.5
TransformFunctionBody.cpp updated: 1.45 -> 1.46
---
Log message:

Bring all of these files back from the "release_15" merge.









---
Diffs of the changes:  (+4186 -0)

 AccessTrace.cpp           |  129 ++++
 Heuristic.cpp             |  509 ++++++++++++++++
 Heuristic.h               |  107 +++
 Makefile                  |   18 
 PointerCompress.cpp       | 1458 ++++++++++++++++++++++++++++++++++++++++++++++
 PoolAllocate.cpp          |  870 +++++++++++++++++++++++++++
 PoolAllocate.h            |  262 ++++++++
 PoolOptimize.cpp          |  240 +++++++
 TransformFunctionBody.cpp |  593 ++++++++++++++++++
 9 files changed, 4186 insertions(+)


Index: llvm-poolalloc/lib/PoolAllocate/AccessTrace.cpp
diff -u /dev/null llvm-poolalloc/lib/PoolAllocate/AccessTrace.cpp:1.3

--- /dev/null	Wed May 18 14:56:38 2005
+++ llvm-poolalloc/lib/PoolAllocate/AccessTrace.cpp	Wed May 18 14:56:28 2005
@@ -0,0 +1,129 @@
+//===-- PoolAccessTrace.cpp - Build trace of loads ------------------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the -poolaccesstrace pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pointercompress"
+#include "PoolAllocate.h"
+#include "llvm/Analysis/DataStructure/DataStructure.h"
+#include "llvm/Analysis/DataStructure/DSGraph.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+using namespace llvm;
+
+namespace {
+
+  /// PoolAccessTrace - This transformation adds instrumentation to the program
+  /// to print a trace of pairs containing the address of each load and the pool
+  /// descriptor loaded from.
+  class PoolAccessTrace : public ModulePass {
+    PoolAllocate *PoolAlloc;
+    EquivClassGraphs *ECG;
+    Function *AccessTraceInitFn, *PoolAccessTraceFn;
+    const Type *VoidPtrTy;
+  public:
+
+    bool runOnModule(Module &M);
+
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    const DSGraph &getGraphForFunc(PA::FuncInfo *FI) const {
+      return ECG->getDSGraph(FI->F);
+    }
+
+  private:
+    void InitializeLibraryFunctions(Module &M);
+    void InstrumentAccess(Instruction *I, Value *Ptr, 
+                          PA::FuncInfo *FI, DSGraph &DSG);
+  };
+
+  RegisterOpt<PoolAccessTrace>
+  X("poolaccesstrace", "Instrument program to print trace of accesses");
+}
+
+void PoolAccessTrace::getAnalysisUsage(AnalysisUsage &AU) const {
+  // Need information about how pool allocation happened.
+  AU.addRequired<PoolAllocatePassAllPools>();
+
+  // Need information from DSA.
+  AU.addRequired<EquivClassGraphs>();
+}
+
+void PoolAccessTrace::InitializeLibraryFunctions(Module &M) {
+  VoidPtrTy = PointerType::get(Type::SByteTy);
+
+  AccessTraceInitFn = M.getOrInsertFunction("poolaccesstraceinit",
+                                            Type::VoidTy,0);
+  PoolAccessTraceFn = M.getOrInsertFunction("poolaccesstrace", Type::VoidTy,
+                                            VoidPtrTy, VoidPtrTy, 0);
+}
+
+void PoolAccessTrace::InstrumentAccess(Instruction *I, Value *Ptr, 
+                                       PA::FuncInfo *FI, DSGraph &DSG) {
+  // Don't trace loads of globals or the stack.
+  if (isa<Constant>(Ptr) || isa<AllocaInst>(Ptr)) return;
+
+  Value *MappedPtr = Ptr;
+  if (!FI->NewToOldValueMap.empty())
+    if ((MappedPtr = FI->MapValueToOriginal(MappedPtr)) == 0) {
+      // Value didn't exist in the orig program (pool desc?).
+      return;
+    }
+  DSNode *Node = DSG.getNodeForValue(MappedPtr).getNode();
+  if (Node == 0) return;
+
+  Value *PD = FI->PoolDescriptors[Node];
+  Ptr = new CastInst(Ptr, VoidPtrTy, Ptr->getName(), I);
+
+  if (PD)
+    PD = new CastInst(PD, VoidPtrTy, PD->getName(), I);
+  else
+    PD = Constant::getNullValue(VoidPtrTy);
+
+  // Insert the trace call.
+  new CallInst(PoolAccessTraceFn, Ptr, PD, "", I);
+}
+
+bool PoolAccessTrace::runOnModule(Module &M) {
+  PoolAlloc = &getAnalysis<PoolAllocatePassAllPools>();
+  ECG = &getAnalysis<EquivClassGraphs>();
+
+  // Create the function prototypes for runtime library.
+  InitializeLibraryFunctions(M);
+
+  Function *MainFunc = M.getMainFunction();
+  if (MainFunc && !MainFunc->isExternal())
+    // Insert a call to the library init function into the beginning of main.
+    new CallInst(AccessTraceInitFn, "", MainFunc->begin()->begin());
+
+  // Look at all of the loads in the program.
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isExternal()) continue;
+
+    PA::FuncInfo *FI = PoolAlloc->getFuncInfoOrClone(*F);
+    assert(FI && "DIDN'T FIND POOL INFO!");
+
+    // If this function was cloned, and this is the original function, ignore it
+    // (it's dead).  We'll deal with the cloned version later when we run into
+    // it again.
+    if (FI->Clone && &FI->F == F)
+      continue;
+
+    // Get the DSGraph for this function.
+    DSGraph &DSG = ECG->getDSGraph(FI->F);
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        if (LoadInst *LI = dyn_cast<LoadInst>(I))
+          InstrumentAccess(LI, LI->getOperand(0), FI, DSG);
+  }
+  return true;
+}


Index: llvm-poolalloc/lib/PoolAllocate/Heuristic.cpp
diff -u /dev/null llvm-poolalloc/lib/PoolAllocate/Heuristic.cpp:1.13
--- /dev/null	Wed May 18 14:56:44 2005
+++ llvm-poolalloc/lib/PoolAllocate/Heuristic.cpp	Wed May 18 14:56:28 2005
@@ -0,0 +1,509 @@
+//===-- Heuristic.cpp - Interface to PA heuristics ------------------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This implements the various pool allocation heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Heuristic.h"
+#include "PoolAllocate.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/DataStructure/DSGraphTraits.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetData.h"
+#include <iostream>
+using namespace llvm;
+using namespace PA;
+
+namespace {
+  enum PoolAllocHeuristic {
+    NoNodes,
+    OnlyOverhead,
+    AllInOneGlobalPool,
+    SmartCoallesceNodes,
+    CyclicNodes,
+    AllButUnreachableFromMemory,
+    AllNodes,
+  };
+  cl::opt<PoolAllocHeuristic>
+  TheHeuristic("poolalloc-heuristic",
+    cl::desc("Heuristic to choose which nodes to pool allocate"),
+    cl::values(clEnumVal(AllNodes, "  Pool allocate all nodes"),
+               clEnumVal(AllButUnreachableFromMemory, "  Pool allocate all reachable from memory objects"),
+               clEnumVal(CyclicNodes, "  Pool allocate nodes with cycles"),
+               clEnumVal(SmartCoallesceNodes, "  Use the smart node merging heuristic"),
+               clEnumVal(AllInOneGlobalPool, "  Use pool library as replacement for malloc/free"),
+               clEnumVal(OnlyOverhead, "  Do not pool allocate anything, but induce all overhead from it"),
+               clEnumVal(NoNodes, "  Do not pool allocate anything"),
+               clEnumValEnd),
+    cl::init(AllButUnreachableFromMemory)); 
+
+  cl::opt<bool>
+  DisableAlignOpt("poolalloc-disable-alignopt",
+                  cl::desc("Force all pool alignment to 8 bytes"));
+}
+
+Heuristic::~Heuristic() {}
+
+unsigned Heuristic::getRecommendedSize(const DSNode *N) {
+  unsigned PoolSize = 0;
+  if (!N->isArray() && N->getType()->isSized()) {
+    PoolSize = N->getParentGraph()->getTargetData().getTypeSize(N->getType());
+  }
+  if (PoolSize == 1) PoolSize = 0;
+  return PoolSize;
+}
+
+/// Wants8ByteAlignment - FIXME: this is a complete hack for X86 right now.
+static bool Wants8ByteAlignment(const Type *Ty, unsigned Offs,
+                                const TargetData &TD) {
+  if (DisableAlignOpt) return true;
+
+  if ((Offs & 7) == 0) {
+    // Doubles always want to be 8-byte aligned.
+    if (Ty == Type::DoubleTy) return true;
+    
+    // If we are on a 64-bit system, we want to align 8-byte integers and
+    // pointers.
+    if (TD.getTypeAlignment(Ty) == 8)
+      return true;
+  }
+
+  if (Ty->isPrimitiveType() || isa<PointerType>(Ty))
+    return false;
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = TD.getStructLayout(STy);
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      if (Wants8ByteAlignment(STy->getElementType(i),
+                              Offs+SL->MemberOffsets[i], TD))
+        return true;
+    }
+  } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+    return Wants8ByteAlignment(STy->getElementType(), Offs, TD);
+  } else {
+    std::cerr << *Ty << "\n";
+    assert(0 && "Unknown type!");
+  }
+  return false;
+}
+
+unsigned Heuristic::getRecommendedAlignment(const Type *Ty,
+                                            const TargetData &TD) {
+  if (Ty == Type::VoidTy)  // Is this void or collapsed?
+    return 0;  // No known alignment, let runtime decide.
+
+  return Wants8ByteAlignment(Ty, 0, TD) ? 8 : 4;
+}
+
+/// getRecommendedAlignment - Return the recommended object alignment for this
+/// DSNode.
+///
+unsigned Heuristic::getRecommendedAlignment(const DSNode *N) {
+  if (N->getType() == Type::VoidTy)  // Is this void or collapsed?
+    return 0;  // No known alignment, let runtime decide.
+
+  const TargetData &TD = N->getParentGraph()->getTargetData();
+
+  // If there are no doubles on an 8-byte boundary in this structure, there is
+  // no reason to 8-byte align objects in the pool.
+  return Wants8ByteAlignment(N->getType(), 0, TD) ? 8 : 4;
+}
+ 
+
+//===-- AllNodes Heuristic ------------------------------------------------===//
+//
+// This heuristic pool allocates everything possible into separate pools.
+//
+struct AllNodesHeuristic : public Heuristic {
+
+  void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
+                     Function *F, DSGraph &G,
+                     std::vector<OnePool> &ResultPools) {
+    for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
+      ResultPools.push_back(OnePool(NodesToPA[i]));
+  }
+};
+
+
+//===-- AllButUnreachableFromMemoryHeuristic Heuristic --------------------===//
+//
+// This heuristic pool allocates everything possible into separate pools, unless
+// the pool is not reachable by other memory objects.  This filters out objects
+// that are not cyclic and are only pointed to by scalars: these tend to be
+// singular memory allocations that are not worth creating a whole pool for.
+//
+struct AllButUnreachableFromMemoryHeuristic : public Heuristic {
+
+  void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
+                     Function *F, DSGraph &G,
+                     std::vector<OnePool> &ResultPools) {
+    // Build a set of all nodes that are reachable from another node in the
+    // graph.  Here we ignore scalar nodes that are only globals as they are
+    // often global pointers to big arrays.
+    std::set<const DSNode*> ReachableFromMemory;
+    for (DSGraph::node_iterator I = G.node_begin(), E = G.node_end();
+         I != E; ++I) {
+      DSNode *N = I;
+      // Ignore nodes that are just globals and not arrays.
+      /*
+      if (N->isArray() || N->isHeapNode() || N->isAllocaNode() ||
+          N->isUnknownNode())
+      */
+      // If a node is marked, all children are too.
+      if (!ReachableFromMemory.count(N))
+        for (DSNode::iterator NI = N->begin(), E = N->end(); NI != E; ++NI)
+          for (df_ext_iterator<const DSNode*>
+                 DI = df_ext_begin(*NI, ReachableFromMemory),
+                 E = df_ext_end(*NI, ReachableFromMemory); DI != E; ++DI)
+          /*empty*/;
+    }
+
+    // Only pool allocate a node if it is reachable from a memory object (itself
+    // included).
+    for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
+      if (ReachableFromMemory.count(NodesToPA[i]))
+        ResultPools.push_back(OnePool(NodesToPA[i]));
+  }
+};
+
+//===-- CyclicNodes Heuristic ---------------------------------------------===//
+//
+// This heuristic only pool allocates nodes in an SCC in the DSGraph.
+//
+struct CyclicNodesHeuristic : public Heuristic {
+
+  void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
+                     Function *F, DSGraph &G,
+                     std::vector<OnePool> &ResultPools);
+};
+
+static bool NodeExistsInCycle(const DSNode *N) {
+  for (DSNode::const_iterator I = N->begin(), E = N->end(); I != E; ++I)
+    if (*I && std::find(df_begin(*I), df_end(*I), N) != df_end(*I))
+      return true;
+  return false;
+}
+
+void CyclicNodesHeuristic::AssignToPools(const std::vector<const 
+                                                           DSNode*> &NodesToPA,
+                                         Function *F, DSGraph &G,
+                                         std::vector<OnePool> &ResultPools) {
+  for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
+    if (NodeExistsInCycle(NodesToPA[i]))
+      ResultPools.push_back(OnePool(NodesToPA[i]));
+}
+
+
+//===-- SmartCoallesceNodes Heuristic -------------------------------------===//
+//
+// This heuristic attempts to be smart and coallesce nodes at times.  In
+// practice, it doesn't work very well.
+//
+struct SmartCoallesceNodesHeuristic : public Heuristic {
+
+  void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
+                    Function *F, DSGraph &G,
+                    std::vector<OnePool> &ResultPools) {
+    // For globals, do not pool allocate unless the node is cyclic and not an
+    // array (unless it's collapsed).
+    if (F == 0) {
+      for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
+        const DSNode *Node = NodesToPA[i];
+        if ((Node->isNodeCompletelyFolded() || !Node->isArray()) &&
+            NodeExistsInCycle(Node))
+          ResultPools.push_back(OnePool(Node));
+      }
+    } else {
+      // TODO
+    }
+  }
+};
+
+#if 0
+/// NodeIsSelfRecursive - Return true if this node contains a pointer to itself.
+static bool NodeIsSelfRecursive(DSNode *N) {
+  for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
+    if (*I == N) return true;
+  return false;
+}
+
+/// POVisit - This implements functionality found in Support/PostOrderIterator.h
+/// but in a way that allows multiple roots to be used.  If PostOrderIterator
+/// supported an external set like DepthFirstIterator did I could eliminate this
+/// cruft.
+///
+static void POVisit(DSNode *N, std::set<DSNode*> &Visited,
+                    std::vector<DSNode*> &Order) {
+  if (!Visited.insert(N).second) return;  // already visited
+
+  // Visit all children before visiting this node.
+  for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
+    if (DSNode *C = const_cast<DSNode*>(*I))
+      POVisit(C, Visited, Order);
+  // Now that we visited all of our children, add ourself to the order.
+  Order.push_back(N);
+}
+
+
+
+  // Heuristic for building per-function pools
+
+  switch (Heuristic) {
+  case SmartCoallesceNodes: {
+    std::set<DSNode*> NodesToPASet(NodesToPA.begin(), NodesToPA.end());
+
+    // DSGraphs only have unidirectional edges, to traverse or inspect the
+    // predecessors of nodes, we must build a mapping of the inverse graph.
+    std::map<DSNode*, std::vector<DSNode*> > InverseGraph;
+
+    for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
+      DSNode *Node = NodesToPA[i];
+      for (DSNode::iterator CI = Node->begin(), E = Node->end(); CI != E; ++CI)
+        if (DSNode *Child = const_cast<DSNode*>(*CI))
+          if (NodesToPASet.count(Child))
+            InverseGraph[Child].push_back(Node);
+    }
+
+    // Traverse the heap nodes in reverse-post-order so that we are guaranteed
+    // to visit all nodes pointing to another node before we visit that node
+    // itself (except with cycles).
+
+    // FIXME: This really should be using the PostOrderIterator.h file stuff,
+    // but the routines there do not support external storage!
+    std::set<DSNode*> Visited;
+    std::vector<DSNode*> Order;
+    for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
+      POVisit(NodesToPA[i], Visited, Order);
+
+    // We want RPO, not PO, so reverse the order.
+    std::reverse(Order.begin(), Order.end());
+
+    // Okay, we have an ordering of the nodes in reverse post order.  Traverse
+    // each node in this ordering, noting that there may be nodes in the order
+    // that are not in our NodesToPA list.
+    for (unsigned i = 0, e = Order.size(); i != e; ++i)
+      if (NodesToPASet.count(Order[i])) {        // Only process pa nodes.
+        DSNode *N = Order[i];
+
+        // If this node has a backedge to itself, pool allocate it in a new
+        // pool.
+        if (NodeIsSelfRecursive(N)) {
+          // Create a new alloca instruction for the pool...
+          Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
+        
+          // Void types in DS graph are never used
+          if (N->isNodeCompletelyFolded())
+            std::cerr << "Node collapsing in '" << F.getName() << "'\n";
+        
+          // Update the PoolDescriptors map
+          PoolDescriptors.insert(std::make_pair(N, AI));
+#if 1
+        } else if (N->isArray() && !N->isNodeCompletelyFolded()) {
+          // We never pool allocate array nodes.
+          PoolDescriptors[N] =
+            Constant::getNullValue(PointerType::get(PoolDescType));
+          ++NumNonprofit;
+#endif
+        } else {
+          // Otherwise the node is not self recursive.  If the node is not an
+          // array, we can co-locate it with the pool of a predecessor node if
+          // any has been pool allocated, and start a new pool if a predecessor
+          // is an array.  If there is a predecessor of this node that has not
+          // been visited yet in this RPO traversal, that means there is a
+          // cycle, so we choose to pool allocate this node right away.
+          //
+          // If there multiple predecessors in multiple different pools, we
+          // don't pool allocate this at all.
+
+          // Check out each of the predecessors of this node.
+          std::vector<DSNode*> &Preds = InverseGraph[N];
+          Value *PredPool = 0;
+          bool HasUnvisitedPred     = false;
+          bool HasArrayPred         = false;
+          bool HasMultiplePredPools = false;
+          for (unsigned p = 0, e = Preds.size(); p != e; ++p) {
+            DSNode *Pred = Preds[p];
+            if (!PoolDescriptors.count(Pred))
+              HasUnvisitedPred = true;  // no pool assigned to predecessor?
+            else if (Pred->isArray() && !Pred->isNodeCompletelyFolded())
+              HasArrayPred = true;
+            else if (PredPool && PoolDescriptors[Pred] != PredPool)
+              HasMultiplePredPools = true;
+            else if (!PredPool &&
+                     !isa<ConstantPointerNull>(PoolDescriptors[Pred]))
+              PredPool = PoolDescriptors[Pred];
+            // Otherwise, this predecessor has the same pool as a previous one.
+          }
+
+          if (HasMultiplePredPools) {
+            // If this node has predecessors that are in different pools, don't
+            // pool allocate this node.
+            PoolDescriptors[N] =
+              Constant::getNullValue(PointerType::get(PoolDescType));
+            ++NumNonprofit;
+          } else if (PredPool) {
+            // If all of the predecessors of this node are already in a pool,
+            // colocate.
+            PoolDescriptors[N] = PredPool;
+            ++NumColocated;
+          } else if (HasArrayPred || HasUnvisitedPred) {
+            // If this node has an array predecessor, or if there is a
+            // predecessor that has not been visited yet, allocate a new pool
+            // for it.
+            Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
+            if (N->isNodeCompletelyFolded())
+              std::cerr << "Node collapsing in '" << F.getName() << "'\n";
+
+            PoolDescriptors[N] = AI;
+          } else {
+            // If this node has no pool allocated predecessors, and there is no
+            // reason to pool allocate it, don't.
+            assert(PredPool == 0);
+             PoolDescriptors[N] =
+              Constant::getNullValue(PointerType::get(PoolDescType));
+            ++NumNonprofit;
+          }
+        }
+      }
+  }  // End switch case
+  }  // end switch
+#endif
+
+
+//===-- AllInOneGlobalPool Heuristic --------------------------------------===//
+//
+// This heuristic puts all memory in the whole program into a single global
+// pool.  This is not safe, and is not good for performance, but can be used to
+// evaluate how good the pool allocator runtime works as a "malloc replacement".
+//
+struct AllInOneGlobalPoolHeuristic : public Heuristic {
+  // TheGlobalPD - This global pool is the one and only one used when running
+  // with Heuristic=AllInOneGlobalPool.
+  GlobalVariable *TheGlobalPD;
+
+  AllInOneGlobalPoolHeuristic() : TheGlobalPD(0) {}
+
+
+  virtual bool IsRealHeuristic() { return false; }
+
+  void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
+                    Function *F, DSGraph &G,
+                    std::vector<OnePool> &ResultPools) {
+    if (TheGlobalPD == 0)
+      TheGlobalPD = PA->CreateGlobalPool(0, 0);
+
+    // All nodes allocate from the same global pool.
+    OnePool Pool;
+    Pool.NodesInPool = NodesToPA;
+    Pool.PoolDesc = TheGlobalPD;
+    ResultPools.push_back(Pool);
+  }
+};
+
+//===-- OnlyOverhead Heuristic --------------------------------------------===//
+//
+// This heuristic is a hack to evaluate how much overhead pool allocation adds
+// to a program.  It adds all of the arguments, poolinits and pool destroys to
+// the program, but dynamically only passes null into the pool alloc/free
+// functions, causing them to allocate from the heap.
+//
+struct OnlyOverheadHeuristic : public Heuristic {
+  virtual bool IsRealHeuristic() { return false; }
+
+  void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
+                    Function *F, DSGraph &G,
+                    std::vector<OnePool> &ResultPools) {
+    // For this heuristic, we assign everything possible to its own pool.
+    for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
+      ResultPools.push_back(OnePool(NodesToPA[i]));
+  }
+
+  void HackFunctionBody(Function &F, std::map<const DSNode*, Value*> &PDs);
+};
+
+/// getDynamicallyNullPool - Return a PoolDescriptor* that is always dynamically
+/// null.  Insert the code necessary to produce it before the specified
+/// instruction.
+static Value *getDynamicallyNullPool(BasicBlock::iterator I) {
+  // Arrange to dynamically pass null into all of the pool functions if we are
+  // only checking for overhead.
+  static Value *NullGlobal = 0;
+  if (!NullGlobal) {
+    Module *M = I->getParent()->getParent()->getParent();
+    NullGlobal = new GlobalVariable(PoolAllocate::PoolDescPtrTy, false,
+                                    GlobalValue::ExternalLinkage,
+                         Constant::getNullValue(PoolAllocate::PoolDescPtrTy),
+                                    "llvm-poolalloc-null-init", M);
+  }
+  while (isa<AllocaInst>(I)) ++I;
+
+  return new LoadInst(NullGlobal, "nullpd", I);
+}
+
+// HackFunctionBody - This method is called on every transformed function body.
+// Basically it replaces all uses of real pool descriptors with dynamically null
+// values.  However, it leaves pool init/destroy alone.
+void OnlyOverheadHeuristic::HackFunctionBody(Function &F,
+                                             std::map<const DSNode*,
+                                             Value*> &PDs) {
+  Function *PoolInit = PA->PoolInit;
+  Function *PoolDestroy = PA->PoolDestroy;
+
+  Value *NullPD = getDynamicallyNullPool(F.front().begin());
+  for (std::map<const DSNode*, Value*>::iterator PDI = PDs.begin(),
+         E = PDs.end(); PDI != E; ++PDI) {
+    Value *OldPD = PDI->second;
+    std::vector<User*> OldPDUsers(OldPD->use_begin(), OldPD->use_end());
+    for (unsigned i = 0, e = OldPDUsers.size(); i != e; ++i) {
+      CallSite PDUser = CallSite::get(cast<Instruction>(OldPDUsers[i]));
+      if (PDUser.getCalledValue() != PoolInit &&
+          PDUser.getCalledValue() != PoolDestroy) {
+        assert(PDUser.getInstruction()->getParent()->getParent() == &F &&
+               "Not in cur fn??");
+        PDUser.getInstruction()->replaceUsesOfWith(OldPD, NullPD);
+      }
+    }
+  }
+}
+
+
+//===-- NoNodes Heuristic -------------------------------------------------===//
+//
+// This dummy heuristic chooses to not pool allocate anything.
+//
+struct NoNodesHeuristic : public Heuristic {
+  virtual bool IsRealHeuristic() { return false; }
+
+  void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
+                    Function *F, DSGraph &G,
+                    std::vector<OnePool> &ResultPools) {
+    // Nothing to pool allocate here.
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Heuristic dispatch support
+//
+
+PA::Heuristic *Heuristic::create() {
+  switch (TheHeuristic) {
+  default: assert(0 && "Unknown heuristic!");
+  case AllNodes: return new AllNodesHeuristic();
+  case AllButUnreachableFromMemory:
+    return new AllButUnreachableFromMemoryHeuristic();
+  case CyclicNodes: return new CyclicNodesHeuristic();
+  case SmartCoallesceNodes: return new SmartCoallesceNodesHeuristic();
+  case AllInOneGlobalPool: return new AllInOneGlobalPoolHeuristic();
+  case OnlyOverhead: return new OnlyOverheadHeuristic();
+  case NoNodes: return new NoNodesHeuristic();
+  }
+}


Index: llvm-poolalloc/lib/PoolAllocate/Heuristic.h
diff -u /dev/null llvm-poolalloc/lib/PoolAllocate/Heuristic.h:1.6
--- /dev/null	Wed May 18 14:56:44 2005
+++ llvm-poolalloc/lib/PoolAllocate/Heuristic.h	Wed May 18 14:56:28 2005
@@ -0,0 +1,107 @@
+//===-- Heuristic.h - Interface to PA heuristics ----------------*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This header is the abstract interface used by the pool allocator to access
+// the various heuristics supported.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POOLALLOCATION_HEURISTIC_H
+#define POOLALLOCATION_HEURISTIC_H
+
+#include <vector>
+#include <map>
+
+namespace llvm {
+  class Value;
+  class Function;
+  class Module;
+  class DSGraph;
+  class DSNode;
+  class PoolAllocate;
+  class TargetData;
+  class Type;
+
+namespace PA {
+  class Heuristic {
+  protected:
+    Module *M;
+    DSGraph *GG;
+    PoolAllocate *PA;
+
+    Heuristic() {}
+  public:
+    void Initialize(Module &m, DSGraph &gg, PoolAllocate &pa) {
+      M = &m; GG = ≫ PA = &pa;
+    }
+    virtual ~Heuristic();
+
+    /// IsRealHeuristic - Return true if this is not a real pool allocation
+    /// heuristic.
+    virtual bool IsRealHeuristic() { return true; }
+
+    /// OnePool - This represents some number of nodes which are coallesced into
+    /// a pool.
+    struct OnePool {
+      // NodesInPool - The DS nodes to be allocated to this pool.  There may be
+      // multiple here if they are being coallesced into the same pool.
+      std::vector<const DSNode*> NodesInPool;
+
+      // PoolDesc - If the heuristic wants the nodes allocated to a specific
+      // pool descriptor, it can specify it here, otherwise a new pool is
+      // created.
+      Value *PoolDesc;
+
+      // PoolSize - If the pool is to be created, indicate the "recommended
+      // size" for the pool here.  This gets passed into poolinit.
+      unsigned PoolSize;
+      unsigned PoolAlignment;
+
+      OnePool() : PoolDesc(0), PoolSize(0), PoolAlignment(0) {}
+
+      OnePool(const DSNode *N) : PoolDesc(0), PoolSize(getRecommendedSize(N)), 
+                                 PoolAlignment(getRecommendedAlignment(N)) {
+        NodesInPool.push_back(N);
+      }
+      OnePool(const DSNode *N, Value *PD) : PoolDesc(PD), PoolSize(0),
+                                            PoolAlignment(0) {
+        NodesInPool.push_back(N);
+      }
+    };
+
+    /// AssignToPools - Partition NodesToPA into a set of disjoint pools,
+    /// returning the result in ResultPools.  If this is a function being pool
+    /// allocated, F will not be null.
+    virtual void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
+                               Function *F, DSGraph &G,
+                               std::vector<OnePool> &ResultPools) = 0;
+
+    // Hacks for the OnlyOverhead heuristic.
+    virtual void HackFunctionBody(Function &F,
+                                  std::map<const DSNode*, Value*> &PDs) {}
+
+    /// getRecommendedSize - Return the recommended pool size for this DSNode.
+    ///
+    static unsigned getRecommendedSize(const DSNode *N);
+
+    /// getRecommendedAlignment - Return the recommended object alignment for
+    /// this DSNode.
+    ///
+    static unsigned getRecommendedAlignment(const DSNode *N);
+    static unsigned getRecommendedAlignment(const Type *Ty,
+                                            const TargetData &TD);
+    
+    /// create - This static ctor creates the heuristic, based on the command
+    /// line argument to choose the heuristic.
+    static Heuristic *create();
+  };
+}
+}
+
+#endif


Index: llvm-poolalloc/lib/PoolAllocate/Makefile
diff -u /dev/null llvm-poolalloc/lib/PoolAllocate/Makefile:1.7
--- /dev/null	Wed May 18 14:56:44 2005
+++ llvm-poolalloc/lib/PoolAllocate/Makefile	Wed May 18 14:56:28 2005
@@ -0,0 +1,18 @@
+#
+# Indicate where we are relative to the top of the source tree.
+#
+LEVEL=../..
+
+#
+# Give the name of a library.  This will build a dynamic version.
+#
+SHARED_LIBRARY=1
+LOADABLE_MODULE = 1
+DONT_BUILD_RELINKED=1
+LIBRARYNAME=poolalloc
+
+#
+# Include Makefile.common so we know what to do.
+#
+include $(LEVEL)/Makefile.common
+


Index: llvm-poolalloc/lib/PoolAllocate/PointerCompress.cpp
diff -u /dev/null llvm-poolalloc/lib/PoolAllocate/PointerCompress.cpp:1.62
--- /dev/null	Wed May 18 14:56:44 2005
+++ llvm-poolalloc/lib/PoolAllocate/PointerCompress.cpp	Wed May 18 14:56:28 2005
@@ -0,0 +1,1458 @@
+//===-- PointerCompress.cpp - Pointer Compression Pass --------------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the -pointercompress pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pointercompress"
+#include "PoolAllocate.h"
+#include "Heuristic.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/DataStructure/DataStructure.h"
+#include "llvm/Analysis/DataStructure/DSGraph.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+
+#include "llvm/Transforms/Utils/Cloning.h"
+using namespace llvm;
+
+/// MEMUINTTYPE - This is the actual type we are compressing to.  This is really
+/// only capable of being UIntTy, except when we are doing tests for 16-bit
+/// integers, when it's UShortTy.
+static const Type *MEMUINTTYPE;
+
+/// SCALARUINTTYPE - We keep scalars the same size as the machine word on the
+/// system (e.g. 64-bits), only keeping memory objects in MEMUINTTYPE.
+static const Type *SCALARUINTTYPE;
+
+namespace {
+  cl::opt<bool>
+  SmallIntCompress("compress-to-16-bits",
+                   cl::desc("Pointer compress data structures to 16 bit "
+                            "integers instead of 32-bit integers"));
+  cl::opt<bool>
+  DisablePoolBaseASR("disable-ptrcomp-poolbase-aggregation",
+                     cl::desc("Don't optimize pool base loads"));
+
+  Statistic<> NumCompressed("pointercompress",
+                            "Number of pools pointer compressed");
+  Statistic<> NumNotCompressed("pointercompress",
+                               "Number of pools not compressible");
+  Statistic<> NumCloned    ("pointercompress", "Number of functions cloned");
+
+  class CompressedPoolInfo;
+
+  /// FunctionCloneRecord - One of these is kept for each function that is
+  /// cloned.
+  struct FunctionCloneRecord {
+    /// PAFn - The pool allocated input function that we compressed.
+    ///
+    Function *PAFn;
+    FunctionCloneRecord(Function *pafn) : PAFn(pafn) {}
+
+    /// PoolDescriptors - The Value* which defines the pool descriptor for this
+    /// DSNode.  Note: Does not necessarily include pool arguments that are
+    /// passed in because of indirect function calls that are not used in the
+    /// function.
+    std::map<const DSNode*, Value*> PoolDescriptors;
+
+    /// NewToOldValueMap - This is a mapping from the values in the cloned body
+    /// to the values in PAFn.
+    std::map<Value*, const Value*> NewToOldValueMap;
+
+    const Value *getValueInOriginalFunction(Value *V) const {
+      std::map<Value*, const Value*>::const_iterator I =
+        NewToOldValueMap.find(V);
+      if (I == NewToOldValueMap.end()) {
+        for (I = NewToOldValueMap.begin(); I != NewToOldValueMap.end(); ++I)
+          std::cerr << "MAP: " << *I->first << " TO: " << *I->second << "\n";
+      }
+      assert (I != NewToOldValueMap.end() && "Value did not come from clone!");
+      return I->second;
+    }
+  };
+
+  /// PointerCompress - This transformation hacks on type-safe pool allocated
+  /// data structures to reduce the size of pointers in the program.
+  class PointerCompress : public ModulePass {
+    PoolAllocate *PoolAlloc;
+    EquivClassGraphs *ECG;
+
+    /// ClonedFunctionMap - Every time we clone a function to compress its
+    /// arguments, keep track of the clone and which arguments are compressed.
+    typedef std::pair<Function*, std::set<const DSNode*> > CloneID;
+    std::map<CloneID, Function *> ClonedFunctionMap;
+
+    std::map<std::pair<Function*, std::vector<unsigned> >,
+             Function*> ExtCloneFunctionMap;
+
+    /// ClonedFunctionInfoMap - This identifies the pool allocated function that
+    /// a clone came from.
+    std::map<Function*, FunctionCloneRecord> ClonedFunctionInfoMap;
+    
+    /// CompressedGlobalPools - Keep track of which DSNodes in the globals graph
+    /// are both pool allocated and should be compressed, and which GlobalValue
+    /// their pool descriptor is.
+    std::map<const DSNode*, GlobalValue*> CompressedGlobalPools;
+
+  public:
+    Function *PoolInitPC, *PoolDestroyPC, *PoolAllocPC;
+    typedef std::map<const DSNode*, CompressedPoolInfo> PoolInfoMap;
+
+    /// NoArgFunctionsCalled - When we are walking the call graph, keep track of
+    /// which functions are called that don't need their prototype to be
+    /// changed.
+    std::vector<Function*> NoArgFunctionsCalled;
+
+    bool runOnModule(Module &M);
+
+    void HandleGlobalPools(Module &M);
+
+
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    PoolAllocate *getPoolAlloc() const { return PoolAlloc; }
+
+    const DSGraph &getGraphForFunc(PA::FuncInfo *FI) const {
+      return ECG->getDSGraph(FI->F);
+    }
+
+    /// getCloneInfo - If the specified function is a clone, return the
+    /// information about the cloning process for it.  Otherwise, return a null
+    /// pointer.
+    FunctionCloneRecord *getCloneInfo(Function &F) {
+      std::map<Function*, FunctionCloneRecord>::iterator I = 
+        ClonedFunctionInfoMap.find(&F);
+      return I == ClonedFunctionInfoMap.end() ? 0 : &I->second;
+    }
+
+    Function *GetFunctionClone(Function *F, 
+                               std::set<const DSNode*> &PoolsToCompress,
+                               PA::FuncInfo &FI, const DSGraph &CG);
+    Function *GetExtFunctionClone(Function *F,
+                                  const std::vector<unsigned> &Args);
+
+  private:
+    void InitializePoolLibraryFunctions(Module &M);
+    bool CompressPoolsInFunction(Function &F,
+                std::vector<std::pair<Value*, Value*> > *PremappedVals = 0,
+                std::set<const DSNode*> *ExternalPoolsToCompress = 0);
+
+    void FindPoolsToCompress(std::set<const DSNode*> &Pools,
+                             std::map<const DSNode*, Value*> &PreassignedPools,
+                             Function &F, DSGraph &DSG, PA::FuncInfo *FI);
+  };
+
+  RegisterOpt<PointerCompress>
+  X("pointercompress", "Compress type-safe data structures");
+}
+
+//===----------------------------------------------------------------------===//
+//               CompressedPoolInfo Class and Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// CompressedPoolInfo - An instance of this structure is created for each
+  /// pool that is compressed.
+  class CompressedPoolInfo {
+    const DSNode *Pool;
+    Value *PoolDesc;
+    const Type *NewTy;
+    unsigned NewSize;
+    mutable Value *PoolBase;
+  public:
+    CompressedPoolInfo(const DSNode *N, Value *PD)
+      : Pool(N), PoolDesc(PD), NewTy(0), PoolBase(0) {}
+    
+    /// Initialize - When we know all of the pools in a function that are going
+    /// to be compressed, initialize our state based on that data.
+    void Initialize(std::map<const DSNode*, CompressedPoolInfo> &Nodes,
+                    const TargetData &TD);
+
+    const DSNode *getNode() const { return Pool; }
+    const Type *getNewType() const { return NewTy; }
+
+    /// getNewSize - Return the size of each node after compression.
+    ///
+    unsigned getNewSize() const { return NewSize; }
+    
+    /// getPoolDesc - Return the Value* for the pool descriptor for this pool.
+    ///
+    Value *getPoolDesc() const { return PoolDesc; }
+
+    /// EmitPoolBaseLoad - Emit code to load the pool base value for this pool
+    /// before the specified instruction.
+    Value *EmitPoolBaseLoad(Instruction &I) const;
+    void setPoolBase(Value *PB) const { PoolBase = PB; }
+
+    // dump - Emit a debugging dump of this pool info.
+    void dump() const;
+
+  private:
+    const Type *ComputeCompressedType(const Type *OrigTy, unsigned NodeOffset,
+                           std::map<const DSNode*, CompressedPoolInfo> &Nodes);
+  };
+}
+
+/// Initialize - When we know all of the pools in a function that are going
+/// to be compressed, initialize our state based on that data.
+void CompressedPoolInfo::Initialize(std::map<const DSNode*, 
+                                             CompressedPoolInfo> &Nodes,
+                                    const TargetData &TD) {
+  // First step, compute the type of the compressed node.  This basically
+  // replaces all pointers to compressed pools with uints.
+  NewTy = ComputeCompressedType(Pool->getType(), 0, Nodes);
+
+  // Get the compressed type size.
+  NewSize = NewTy->isSized() ? TD.getTypeSize(NewTy) : 0;
+}
+
+
+/// ComputeCompressedType - Recursively compute the new type for this node after
+/// pointer compression.  This involves compressing any pointers that point into
+/// compressed pools.
+const Type *CompressedPoolInfo::
+ComputeCompressedType(const Type *OrigTy, unsigned NodeOffset,
+                      std::map<const DSNode*, CompressedPoolInfo> &Nodes) {
+  if (const PointerType *PTY = dyn_cast<PointerType>(OrigTy)) {
+    // Okay, we have a pointer.  Check to see if the node pointed to is actually
+    // compressed!
+    //DSNode *PointeeNode = getNode()->getLink(NodeOffset).getNode();
+    //if (PointeeNode && Nodes.count(PointeeNode))
+      return MEMUINTTYPE;
+    // Otherwise, it points to a non-compressed node.
+    return OrigTy;
+  } else if (OrigTy->isFirstClassType() || OrigTy == Type::VoidTy)
+    return OrigTy;
+
+
+  const TargetData &TD = getNode()->getParentGraph()->getTargetData();
+
+  // Okay, we have an aggregate type.
+  if (const StructType *STy = dyn_cast<StructType>(OrigTy)) {
+    std::vector<const Type*> Elements;
+    Elements.reserve(STy->getNumElements());
+
+    const StructLayout *SL = TD.getStructLayout(STy);
+
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+      Elements.push_back(ComputeCompressedType(STy->getElementType(i),
+                                               NodeOffset+SL->MemberOffsets[i],
+                                               Nodes));
+    return StructType::get(Elements);
+  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(OrigTy)) {
+    return ArrayType::get(ComputeCompressedType(ATy->getElementType(),
+                                                NodeOffset, Nodes),
+                          ATy->getNumElements());
+  } else {
+    std::cerr << "TYPE: " << *OrigTy << "\n";
+    assert(0 && "FIXME: Unhandled aggregate type!");
+    abort();
+  }
+}
+
+/// EmitPoolBaseLoad - Emit code to load the pool base value for this pool
+/// before the specified instruction.
+Value *CompressedPoolInfo::EmitPoolBaseLoad(Instruction &I) const {
+  if (DisablePoolBaseASR) {
+    assert(PoolBase == 0 && "Mixing and matching optimized vs not!");
+    
+    // Get the pool base pointer.
+    Constant *Zero = Constant::getNullValue(Type::UIntTy);
+    Value *BasePtrPtr = new GetElementPtrInst(getPoolDesc(), Zero, Zero,
+                                              "poolbaseptrptr", &I);
+    return new LoadInst(BasePtrPtr, "poolbaseptr", &I);
+  } else {
+    // If this is a pool descriptor passed into the function, and this is the
+    // first use, emit a load of the pool base into the entry of the function.
+    if (PoolBase == 0 && (isa<Argument>(PoolDesc) || 
+                          isa<GlobalVariable>(PoolDesc))) {
+      BasicBlock::iterator IP = I.getParent()->getParent()->begin()->begin();
+      while (isa<AllocaInst>(IP)) ++IP;
+      Constant *Zero = Constant::getNullValue(Type::UIntTy);
+      Value *BasePtrPtr = new GetElementPtrInst(getPoolDesc(), Zero, Zero,
+                                                "poolbaseptrptr", IP);
+      PoolBase = new LoadInst(BasePtrPtr, "poolbaseptr", IP);
+    }
+    
+    assert(PoolBase && "Mixing and matching optimized vs not!");
+    return PoolBase;
+  }
+}
+
+
+/// dump - Emit a debugging dump for this pool info.
+///
+void CompressedPoolInfo::dump() const {
+  const TargetData &TD = getNode()->getParentGraph()->getTargetData();
+  std::cerr << "  From size: "
+            << (getNode()->getType()->isSized() ? 
+                        TD.getTypeSize(getNode()->getType()) : 0)
+            << "  To size: "
+            << (NewTy->isSized() ? TD.getTypeSize(NewTy) : 0) << "\n";
+  std::cerr << "Node: "; getNode()->dump();
+  std::cerr << "New Type: " << *NewTy << "\n";
+}
+
+
+//===----------------------------------------------------------------------===//
+//                    InstructionRewriter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// InstructionRewriter - This class implements the rewriting neccesary to
+  /// transform a function body from normal pool allocation to pointer
+  /// compression.  It is constructed, then the 'visit' method is called on a
+  /// function.  If is responsible for rewriting all instructions that refer to
+  /// pointers into compressed pools.
+  class InstructionRewriter : public llvm::InstVisitor<InstructionRewriter> {
+    /// OldToNewValueMap - This keeps track of what new instructions we create
+    /// for instructions that used to produce pointers into our pool.
+    std::map<Value*, Value*> OldToNewValueMap;
+  
+    const PointerCompress::PoolInfoMap &PoolInfo;
+
+    /// TD - The TargetData object for the current target.
+    ///
+    const TargetData &TD;
+
+
+    DSGraph &DSG;
+
+    /// PAFuncInfo - Information about the transformation the pool allocator did
+    /// to the original function.
+    PA::FuncInfo &PAFuncInfo;
+
+    /// FCR - If we are compressing a clone of a pool allocated function (as
+    /// opposed to the pool allocated function itself), this contains
+    /// information about the clone.
+    FunctionCloneRecord *FCR;
+
+    PointerCompress &PtrComp;
+  public:
+    InstructionRewriter(const PointerCompress::PoolInfoMap &poolInfo,
+                        DSGraph &dsg, PA::FuncInfo &pafi,
+                        FunctionCloneRecord *fcr, PointerCompress &ptrcomp)
+      : PoolInfo(poolInfo), TD(dsg.getTargetData()), DSG(dsg),
+        PAFuncInfo(pafi), FCR(fcr), PtrComp(ptrcomp) {
+    }
+
+    ~InstructionRewriter();
+
+    /// PremapValues - Seed the transformed value map with the specified values.
+    /// This indicates that the first value (a pointer) will map to the second
+    /// value (an integer).  When the InstructionRewriter is complete, all of
+    /// the pointers in this vector are deleted.
+    void PremapValues(std::vector<std::pair<Value*, Value*> > &Vals) {
+      for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+        OldToNewValueMap.insert(Vals[i]);
+    }
+
+    /// getTransformedValue - Return the transformed version of the specified
+    /// value, creating a new forward ref value as needed.
+    Value *getTransformedValue(Value *V) {
+      if (isa<ConstantPointerNull>(V))                // null -> uint 0
+        return Constant::getNullValue(SCALARUINTTYPE);
+      if (isa<UndefValue>(V))                // undef -> uint undef
+        return UndefValue::get(SCALARUINTTYPE);
+
+      if (!getNodeIfCompressed(V))
+        assert(getNodeIfCompressed(V) && "Value is not compressed!");
+      Value *&RV = OldToNewValueMap[V];
+      if (RV) return RV;
+
+      RV = new Argument(SCALARUINTTYPE);
+      return RV;
+    }
+
+    /// setTransformedValue - When we create a new value, this method sets it as
+    /// the current value.
+    void setTransformedValue(Instruction &Old, Value *New) {
+      Value *&EV = OldToNewValueMap[&Old];
+      if (EV) {
+        assert(isa<Argument>(EV) && "Not a forward reference!");
+        EV->replaceAllUsesWith(New);
+        delete EV;
+      }
+      EV = New;
+    }
+
+    /// getMappedNodeHandle - Given a pointer value that may be cloned multiple
+    /// times (once for PA, once for PC) return the node handle in DSG, or a
+    /// null descriptor if the value didn't exist.
+    DSNodeHandle getMappedNodeHandle(Value *V) {
+      assert(isa<PointerType>(V->getType()) && "Not a pointer value!");
+
+      // If this is a function clone, map the value to the original function.
+      if (FCR)
+        V = const_cast<Value*>(FCR->getValueInOriginalFunction(V));
+
+      // If this is a pool allocator clone, map the value to the REAL original
+      // function.
+      if (!PAFuncInfo.NewToOldValueMap.empty())
+        if ((V = PAFuncInfo.MapValueToOriginal(V)) == 0)
+          // Value didn't exist in the orig program (pool desc?).
+          return DSNodeHandle();
+
+      return DSG.getNodeForValue(V);
+    }
+
+    /// getNodeIfCompressed - If the specified value is a pointer that will be
+    /// compressed, return the DSNode corresponding to the pool it belongs to.
+    const DSNode *getNodeIfCompressed(Value *V) {
+      if (!isa<PointerType>(V->getType()) || isa<ConstantPointerNull>(V) ||
+          isa<Function>(V))
+        return 0;
+
+      DSNode *N = getMappedNodeHandle(V).getNode();
+      return PoolInfo.count(N) ? N : 0;
+    }
+
+    /// getPoolInfo - Return the pool info for the specified compressed pool.
+    ///
+    const CompressedPoolInfo &getPoolInfo(const DSNode *N) {
+      assert(N && "Pool not compressed!");
+      PointerCompress::PoolInfoMap::const_iterator I = PoolInfo.find(N);
+      assert(I != PoolInfo.end() && "Pool is not compressed!");
+      return I->second;
+    }
+
+    /// getPoolInfo - Return the pool info object for the specified value if the
+    /// pointer points into a compressed pool, otherwise return null.
+    const CompressedPoolInfo *getPoolInfo(Value *V) {
+      if (const DSNode *N = getNodeIfCompressed(V))
+        return &getPoolInfo(N);
+      return 0;
+    }
+
+    /// getPoolInfoForPoolDesc - Given a pool descriptor as a Value*, return the
+    /// pool info for the pool if it is compressed.
+    const CompressedPoolInfo *getPoolInfoForPoolDesc(Value *PD) const {
+      for (PointerCompress::PoolInfoMap::const_iterator I = PoolInfo.begin(),
+             E = PoolInfo.end(); I != E; ++I)
+        if (I->second.getPoolDesc() == PD)
+          return &I->second;
+      return 0;
+    }
+
+    /// ValueRemoved - Whenever we remove a value from the current function,
+    /// update any maps that contain that pointer so we don't have stale
+    /// pointers hanging around.
+    void ValueRemoved(Value *V) {
+      if (FCR) {
+        // If this is in a pointer-compressed clone, update our map.
+        FCR->NewToOldValueMap.erase(V);
+      } else if (!PAFuncInfo.NewToOldValueMap.empty()) {
+        // Otherwise if this exists in a pool allocator clone, update it now.
+        PAFuncInfo.NewToOldValueMap.erase(V);
+      } else {
+        // Otherwise if this was in the original function, remove it from the
+        // DSG scalar map if it is there.
+        DSG.getScalarMap().eraseIfExists(V);
+      }
+    }
+
+    /// ValueReplaced - Whenever we replace a value from the current function,
+    /// update any maps that contain that value so we don't have stale pointers
+    /// hanging around.
+    void ValueReplaced(Value &Old, Value *New) {
+      // If this value exists in a pointer compress clone, update it now.
+      if (FCR) {
+        std::map<Value*, const Value*>::iterator I =
+          FCR->NewToOldValueMap.find(&Old);
+        assert(I != FCR->NewToOldValueMap.end() && "Didn't find element!?");
+        FCR->NewToOldValueMap.insert(std::make_pair(New, I->second));
+        FCR->NewToOldValueMap.erase(I);
+      } else if (!PAFuncInfo.NewToOldValueMap.empty()) {
+        // Otherwise if this exists in a pool allocator clone, update it now.
+        PA::FuncInfo::NewToOldValueMapTy::iterator I =
+          PAFuncInfo.NewToOldValueMap.find(&Old);
+        if (I != PAFuncInfo.NewToOldValueMap.end()) {
+          PAFuncInfo.NewToOldValueMap[New] = I->second;
+          PAFuncInfo.NewToOldValueMap.erase(I);
+        }
+      
+      } else {
+        // Finally, if this occurred in a function that neither the pool
+        // allocator nor the ptr compression implementation had to change,
+        // update the DSGraph.
+        if (DSG.getScalarMap().count(&Old))
+          DSG.getScalarMap().replaceScalar(&Old, New);
+      }
+    }
+
+    //===------------------------------------------------------------------===//
+    // Visitation methods.  These do all of the heavy lifting for the various
+    // cases we have to handle.
+
+    void visitReturnInst(ReturnInst &RI);
+    void visitCastInst(CastInst &CI);
+    void visitPHINode(PHINode &PN);
+    void visitSelectInst(SelectInst &SI);
+    void visitSetCondInst(SetCondInst &SCI);
+    void visitGetElementPtrInst(GetElementPtrInst &GEPI);
+    void visitLoadInst(LoadInst &LI);
+    void visitStoreInst(StoreInst &SI);
+
+    void visitCallInst(CallInst &CI);
+    void visitPoolInit(CallInst &CI);
+    void visitPoolAlloc(CallInst &CI);
+    void visitPoolDestroy(CallInst &CI);
+
+    void visitInstruction(Instruction &I) {
+#ifndef NDEBUG
+      bool Unhandled = !!getNodeIfCompressed(&I);
+      for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+        Unhandled |= !!getNodeIfCompressed(I.getOperand(i));
+
+      if (Unhandled) {
+        std::cerr << "ERROR: UNHANDLED INSTRUCTION: " << I;
+        //assert(0);
+        //abort();
+      }
+#endif
+    }
+  };
+} // end anonymous namespace.
+
+
+InstructionRewriter::~InstructionRewriter() {
+  // Nuke all of the old values from the program.
+  for (std::map<Value*, Value*>::iterator I = OldToNewValueMap.begin(),
+         E = OldToNewValueMap.end(); I != E; ++I) {
+    assert((!isa<Argument>(I->second) || cast<Argument>(I->second)->getParent())
+           && "ERROR: Unresolved value still left in the program!");
+    // If there is anything still using this, provide a temporary value.
+    if (!I->first->use_empty())
+      I->first->replaceAllUsesWith(UndefValue::get(I->first->getType()));
+
+    // Finally, remove it from the program.
+    if (Instruction *Inst = dyn_cast<Instruction>(I->first)) {
+      ValueRemoved(Inst);
+      Inst->eraseFromParent();
+    } else if (Argument *Arg = dyn_cast<Argument>(I->first)) {
+      assert(Arg->getParent() == 0 && "Unexpected argument type here!");
+      delete Arg;  // Marker node used when cloning.
+    } else {
+      assert(0 && "Unknown entry in this map!");
+    }
+  }
+}
+
+void InstructionRewriter::visitReturnInst(ReturnInst &RI) {
+  if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType()))
+    if (!isa<PointerType>(RI.getParent()->getParent()->getReturnType())) {
+      // Compressing the return value.  
+      new ReturnInst(getTransformedValue(RI.getOperand(0)), &RI);
+      RI.eraseFromParent();
+    }
+}
+
+
+void InstructionRewriter::visitCastInst(CastInst &CI) {
+  if (!isa<PointerType>(CI.getType())) {
+    // If this is a pointer -> integer cast, turn this into an idx -> integer
+    // cast.
+    if (isa<PointerType>(CI.getOperand(0)->getType()) &&
+        getPoolInfo(CI.getOperand(0)))
+      CI.setOperand(0, getTransformedValue(CI.getOperand(0)));
+    return;
+  }
+
+  const CompressedPoolInfo *PI = getPoolInfo(&CI);
+  if (!PI) return;
+  assert(getPoolInfo(CI.getOperand(0)) == PI && "Not cast from ptr -> ptr?");
+
+  // A cast from one pointer to another turns into a cast from uint -> uint,
+  // which is a noop.
+  setTransformedValue(CI, getTransformedValue(CI.getOperand(0)));
+}
+
+void InstructionRewriter::visitPHINode(PHINode &PN) {
+  const CompressedPoolInfo *DestPI = getPoolInfo(&PN);
+  if (DestPI == 0) return;
+
+  PHINode *New = new PHINode(SCALARUINTTYPE, PN.getName(), &PN);
+  New->reserveOperandSpace(PN.getNumIncomingValues());
+
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+    New->addIncoming(getTransformedValue(PN.getIncomingValue(i)),
+                     PN.getIncomingBlock(i));
+  setTransformedValue(PN, New);
+}
+
+void InstructionRewriter::visitSelectInst(SelectInst &SI) {
+  const CompressedPoolInfo *DestPI = getPoolInfo(&SI);
+  if (DestPI == 0) return;
+
+  setTransformedValue(SI, new SelectInst(SI.getOperand(0),
+                                         getTransformedValue(SI.getOperand(1)),
+                                         getTransformedValue(SI.getOperand(2)),
+                                         SI.getName(), &SI));
+}
+
+void InstructionRewriter::visitSetCondInst(SetCondInst &SCI) {
+  if (!isa<PointerType>(SCI.getOperand(0)->getType())) return;
+  Value *NonNullPtr = SCI.getOperand(0);
+  if (isa<ConstantPointerNull>(NonNullPtr)) {
+    NonNullPtr = SCI.getOperand(1);
+    if (isa<ConstantPointerNull>(NonNullPtr))
+      return;  // setcc null, null
+  }
+
+  const CompressedPoolInfo *SrcPI = getPoolInfo(NonNullPtr);
+  if (SrcPI == 0) return;   // comparing non-compressed pointers.
+ 
+  std::string Name = SCI.getName(); SCI.setName("");
+  Value *New = new SetCondInst(SCI.getOpcode(),
+                               getTransformedValue(SCI.getOperand(0)),
+                               getTransformedValue(SCI.getOperand(1)),
+                               Name, &SCI);
+  SCI.replaceAllUsesWith(New);
+  ValueReplaced(SCI, New);
+  SCI.eraseFromParent();
+}
+
+void InstructionRewriter::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+  const CompressedPoolInfo *PI = getPoolInfo(&GEPI);
+  if (PI == 0) return;
+
+  // Get the base index.
+  Value *Val = getTransformedValue(GEPI.getOperand(0));
+
+  bool AllZeros = true;
+  for (unsigned i = 1, e = GEPI.getNumOperands(); i != e; ++i)
+    if (!isa<Constant>(GEPI.getOperand(i)) ||
+        !cast<Constant>(GEPI.getOperand(i))->isNullValue()) {
+      AllZeros = false;
+      break;
+    }
+  if (AllZeros) {
+    // We occasionally get non-type-matching GEP instructions with zeros.  These
+    // are effectively pointer casts, so treat them as such.
+    setTransformedValue(GEPI, Val);
+    return;
+  }
+
+  // The compressed type for the pool.  FIXME: NOTE: This only works if 'Val'
+  // pointed to the start of a node!
+  const Type *NTy = PointerType::get(PI->getNewType());
+
+  gep_type_iterator GTI = gep_type_begin(GEPI), E = gep_type_end(GEPI);
+  for (unsigned i = 1, e = GEPI.getNumOperands(); i != e; ++i, ++GTI) {
+    Value *Idx = GEPI.getOperand(i);
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      unsigned Field = (unsigned)cast<ConstantUInt>(Idx)->getValue();
+      if (Field) {
+        uint64_t FieldOffs = TD.getStructLayout(cast<StructType>(NTy))
+                                        ->MemberOffsets[Field];
+        Constant *FieldOffsCst = ConstantUInt::get(SCALARUINTTYPE, FieldOffs);
+        Val = BinaryOperator::createAdd(Val, FieldOffsCst,
+                                        GEPI.getName(), &GEPI);
+      }
+
+      // If this is a one element struct, NTy may not have the structure type.
+      if (STy->getNumElements() > 1 ||
+          (isa<StructType>(NTy) &&
+           cast<StructType>(NTy)->getNumElements() == 1))
+        NTy = cast<StructType>(NTy)->getElementType(Field);
+    } else {
+      assert(isa<SequentialType>(*GTI) && "Not struct or sequential?");
+      const SequentialType *STy = cast<SequentialType>(*GTI);
+      if (!isa<Constant>(Idx) || !cast<Constant>(Idx)->isNullValue()) {
+        // Add Idx*sizeof(NewElementType) to the index.
+        const Type *ElTy = cast<SequentialType>(NTy)->getElementType();
+        if (Idx->getType() != SCALARUINTTYPE)
+          Idx = new CastInst(Idx, SCALARUINTTYPE, Idx->getName(), &GEPI);
+
+        Constant *Scale = ConstantUInt::get(SCALARUINTTYPE,
+                                            TD.getTypeSize(ElTy));
+        Idx = BinaryOperator::createMul(Idx, Scale, "fieldidx", &GEPI);
+        Val = BinaryOperator::createAdd(Val, Idx, GEPI.getName(), &GEPI);
+      }
+
+      // If this is a one element array type, NTy may not reflect the array.
+      if (!isa<ArrayType>(STy) || cast<ArrayType>(STy)->getNumElements() != 1 ||
+          (isa<ArrayType>(NTy) && cast<ArrayType>(NTy)->getNumElements() == 1))
+        NTy = cast<SequentialType>(NTy)->getElementType();
+    }
+  }
+
+  setTransformedValue(GEPI, Val);
+}
+
+void InstructionRewriter::visitLoadInst(LoadInst &LI) {
+  const CompressedPoolInfo *SrcPI = getPoolInfo(LI.getOperand(0));
+  if (SrcPI == 0) {
+    // If we are loading a compressed pointer from a non-compressessed memory
+    // object, retain the load, but cast from the pointer type to our scalar
+    // type.
+    if (getPoolInfo(&LI)) {
+      Value *NLI = new LoadInst(LI.getOperand(0), LI.getName()+".cp", &LI);
+      Value *NC = new CastInst(NLI, SCALARUINTTYPE, NLI->getName(), &LI);
+      setTransformedValue(LI, NC);
+    }
+    return;
+  }
+
+  // We care about two cases, here:
+  //  1. Loading a normal value from a ptr compressed data structure.
+  //  2. Loading a compressed ptr from a ptr compressed data structure.
+  bool LoadingCompressedPtr = getNodeIfCompressed(&LI) != 0;
+
+  Value *BasePtr = SrcPI->EmitPoolBaseLoad(LI);
+
+  // Get the pointer to load from.
+  std::vector<Value*> Ops;
+  Ops.push_back(getTransformedValue(LI.getOperand(0)));
+  if (Ops[0]->getType() == Type::UShortTy)
+    Ops[0] = new CastInst(Ops[0], Type::UIntTy, "extend_idx", &LI);
+  Value *SrcPtr = new GetElementPtrInst(BasePtr, Ops,
+                                        LI.getOperand(0)->getName()+".pp", &LI);
+  const Type *DestTy = LoadingCompressedPtr ? MEMUINTTYPE : LI.getType();
+  SrcPtr = new CastInst(SrcPtr, PointerType::get(DestTy),
+                        SrcPtr->getName(), &LI);
+  std::string OldName = LI.getName(); LI.setName("");
+  Value *NewLoad = new LoadInst(SrcPtr, OldName, &LI);
+
+  if (LoadingCompressedPtr) {
+    // Convert from MEMUINTTYPE to SCALARUINTTYPE if different.
+    if (MEMUINTTYPE != SCALARUINTTYPE)
+      NewLoad = new CastInst(NewLoad, SCALARUINTTYPE, NewLoad->getName(), &LI);
+
+    setTransformedValue(LI, NewLoad);
+  } else {
+    LI.replaceAllUsesWith(NewLoad);
+    ValueReplaced(LI, NewLoad);
+    LI.eraseFromParent();
+  }
+}
+
+
+
+void InstructionRewriter::visitStoreInst(StoreInst &SI) {
+  const CompressedPoolInfo *DestPI = getPoolInfo(SI.getOperand(1));
+  if (DestPI == 0) {
+    // If we are storing a compressed pointer into uncompressed memory, just
+    // cast the index to a pointer type and store that.
+    if (getPoolInfo(SI.getOperand(0))) {
+      Value *SrcVal = getTransformedValue(SI.getOperand(0));
+      SrcVal = new CastInst(SrcVal, SI.getOperand(0)->getType(),
+                            SrcVal->getName(), &SI);
+      SI.setOperand(0, SrcVal);
+    }
+    return;
+  }
+
+  // We care about two cases, here:
+  //  1. Storing a normal value into a ptr compressed data structure.
+  //  2. Storing a compressed ptr into a ptr compressed data structure.  Note
+  //     that we cannot use the src value to decide if this is a compressed
+  //     pointer if it's a null pointer.  We have to try harder.
+  //
+  Value *SrcVal = SI.getOperand(0);
+  if (!isa<ConstantPointerNull>(SrcVal)) {
+    if (const CompressedPoolInfo *SrcPI = getPoolInfo(SrcVal)) {
+      // If the stored value is compressed, get the xformed version
+      SrcVal = getTransformedValue(SrcVal);
+
+      // If SCALAR type is not the MEM type, reduce it now.
+      if (SrcVal->getType() != MEMUINTTYPE)
+        SrcVal = new CastInst(SrcVal, MEMUINTTYPE, SrcVal->getName(), &SI);
+    }
+  } else {
+    // FIXME: This assumes that all null pointers are compressed!
+    SrcVal = Constant::getNullValue(MEMUINTTYPE);
+  }
+  
+  // Get the pool base pointer.
+  Value *BasePtr = DestPI->EmitPoolBaseLoad(SI);
+
+  // Get the pointer to store to.
+  std::vector<Value*> Ops;
+  Ops.push_back(getTransformedValue(SI.getOperand(1)));
+  if (Ops[0]->getType() == Type::UShortTy)
+    Ops[0] = new CastInst(Ops[0], Type::UIntTy, "extend_idx", &SI);
+
+  Value *DestPtr = new GetElementPtrInst(BasePtr, Ops,
+                                         SI.getOperand(1)->getName()+".pp",
+                                         &SI);
+  DestPtr = new CastInst(DestPtr, PointerType::get(SrcVal->getType()),
+                         DestPtr->getName(), &SI);
+  new StoreInst(SrcVal, DestPtr, &SI);
+
+  // Finally, explicitly remove the store from the program, as it does not
+  // produce a pointer result.
+  SI.eraseFromParent();
+}
+
+
+void InstructionRewriter::visitPoolInit(CallInst &CI) {
+  // Transform to poolinit_pc if this is initializing a pool that we are
+  // compressing.
+  const CompressedPoolInfo *PI = getPoolInfoForPoolDesc(CI.getOperand(1));
+  if (PI == 0) return;  // Pool isn't compressed.
+
+  std::vector<Value*> Ops;
+  Ops.push_back(CI.getOperand(1));
+  // Transform to pass in the compressed size.
+  Ops.push_back(ConstantUInt::get(Type::UIntTy, PI->getNewSize()));
+
+  // Pointer compression can reduce the alignment restriction to 4 bytes from 8.
+  // Reevaluate the desired alignment.
+  Ops.push_back(ConstantUInt::get(Type::UIntTy,
+             PA::Heuristic::getRecommendedAlignment(PI->getNewType(), TD)));
+  // TODO: Compression could reduce the alignment restriction for the pool!
+  Value *PB = new CallInst(PtrComp.PoolInitPC, Ops, "", &CI);
+
+  if (!DisablePoolBaseASR) { // Load the pool base immediately.
+    PB->setName(CI.getOperand(1)->getName()+".poolbase");
+    // Remember the pool base for this pool.
+    PI->setPoolBase(PB);
+  }
+
+  CI.eraseFromParent();
+}
+
+void InstructionRewriter::visitPoolDestroy(CallInst &CI) {
+  // Transform to pooldestroy_pc if this is destroying a pool that we are
+  // compressing.
+  const CompressedPoolInfo *PI = getPoolInfoForPoolDesc(CI.getOperand(1));
+  if (PI == 0) return;  // Pool isn't compressed.
+
+  std::vector<Value*> Ops;
+  Ops.push_back(CI.getOperand(1));
+  new CallInst(PtrComp.PoolDestroyPC, Ops, "", &CI);
+  CI.eraseFromParent();
+}
+
+void InstructionRewriter::visitPoolAlloc(CallInst &CI) {
+  const CompressedPoolInfo *PI = getPoolInfo(&CI);
+  if (PI == 0) return;  // Pool isn't compressed.
+
+  std::vector<Value*> Ops;
+  Ops.push_back(CI.getOperand(1));  // PD
+
+  Value *Size = CI.getOperand(2);
+
+  // If there was a recommended size, shrink it down now.
+  if (unsigned OldSizeV = PA::Heuristic::getRecommendedSize(PI->getNode()))
+    if (OldSizeV != PI->getNewSize()) {
+      // Emit code to scale the allocated size down by the old size then up by
+      // the new size.  We actually compute (N+OS-1)/OS * NS.
+      Value *OldSize = ConstantUInt::get(Type::UIntTy, OldSizeV);
+      Value *NewSize = ConstantUInt::get(Type::UIntTy, PI->getNewSize());
+
+      Size = BinaryOperator::createAdd(Size,
+                                  ConstantUInt::get(Type::UIntTy, OldSizeV-1),
+                                       "roundup", &CI);
+      Size = BinaryOperator::createDiv(Size, OldSize, "numnodes", &CI);
+      Size = BinaryOperator::createMul(Size, NewSize, "newbytes", &CI);
+    }
+
+  Ops.push_back(Size);
+  Value *NC = new CallInst(PtrComp.PoolAllocPC, Ops, CI.getName(), &CI);
+  setTransformedValue(CI, NC);
+}
+
+
+void InstructionRewriter::visitCallInst(CallInst &CI) {
+  if (Function *F = CI.getCalledFunction())
+    // These functions are handled specially.
+    if (F->getName() == "poolinit") {
+      visitPoolInit(CI);
+      return;
+    } else if (F->getName() == "pooldestroy") {
+      visitPoolDestroy(CI);
+      return;
+    } else if (F->getName() == "poolalloc") {
+      visitPoolAlloc(CI);
+      return;
+    }
+  
+  // Normal function call: check to see if this call produces or uses a pointer
+  // into a compressed pool.  If so, we will need to transform the callee or use
+  // a previously transformed version.
+
+  // PoolsToCompress - Keep track of which pools we are supposed to compress,
+  // with the nodes from the callee's graph.
+  std::set<const DSNode*> PoolsToCompress;
+
+  // If this is a direct call, get the information about the callee.
+  PA::FuncInfo *FI = 0;
+  const DSGraph *CG = 0;
+  Function *Callee = CI.getCalledFunction();
+  if (Callee)
+    if (FI = PtrComp.getPoolAlloc()->getFuncInfoOrClone(*Callee))
+      CG = &PtrComp.getGraphForFunc(FI);
+
+  if (!Callee) {
+    // Indirect call: you CAN'T passed compress pointers in.  Don't even think
+    // about it.
+    return;
+  } else if (Callee->isExternal()) {
+    // We don't have a DSG for the callee in this case.  Assume that things will
+    // work out if we pass compressed pointers.
+    std::vector<Value*> Operands;
+    Operands.reserve(CI.getNumOperands()-1);
+
+    // If this is one of the functions we know about, just materialize the
+    // compressed pointer as a real pointer, and pass it.
+    if (Callee->getName() == "printf") {
+      for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
+        if (isa<PointerType>(CI.getOperand(i)->getType()) &&
+            getPoolInfo(CI.getOperand(i)))
+          CI.setOperand(i, getTransformedValue(CI.getOperand(i)));
+      return;
+    } else if (Callee->getName() == "llvm.memset") {
+      if (const CompressedPoolInfo *DestPI = getPoolInfo(CI.getOperand(1))) {
+        std::vector<Value*> Ops;
+        Ops.push_back(getTransformedValue(CI.getOperand(1)));
+        Value *BasePtr = DestPI->EmitPoolBaseLoad(CI);
+        Value *SrcPtr = new GetElementPtrInst(BasePtr, Ops,
+                                       CI.getOperand(1)->getName()+".pp", &CI);
+        SrcPtr = new CastInst(SrcPtr, CI.getOperand(1)->getType(), "", &CI);
+        CI.setOperand(1, SrcPtr);
+        return;
+      }
+    }
+
+
+    std::vector<unsigned> CompressedArgs;
+    if (isa<PointerType>(CI.getType()) && getPoolInfo(&CI))
+      CompressedArgs.push_back(0);  // Compress retval.
+  
+    for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
+      if (isa<PointerType>(CI.getOperand(i)->getType()) &&
+          getPoolInfo(CI.getOperand(i))) {
+        CompressedArgs.push_back(i);
+        Operands.push_back(getTransformedValue(CI.getOperand(i)));
+      } else {
+        Operands.push_back(CI.getOperand(i));
+      }
+
+    if (CompressedArgs.empty()) {
+      PtrComp.NoArgFunctionsCalled.push_back(Callee);
+      return;  // Nothing to compress!
+    }
+
+    Function *Clone = PtrComp.GetExtFunctionClone(Callee, CompressedArgs);
+    Value *NC = new CallInst(Clone, Operands, CI.getName(), &CI);
+    if (NC->getType() != CI.getType())      // Compressing return value?
+      setTransformedValue(CI, NC);
+    else {
+      if (CI.getType() != Type::VoidTy)
+        CI.replaceAllUsesWith(NC);
+      ValueReplaced(CI, NC);
+      CI.eraseFromParent();
+    }
+    return;
+  }
+
+  // CalleeCallerMap: Mapping from nodes in the callee to nodes in the caller.
+  DSGraph::NodeMapTy CalleeCallerMap;
+  
+  // Do we need to compress the return value?
+  if (isa<PointerType>(CI.getType()))
+    DSGraph::computeNodeMapping(CG->getReturnNodeFor(FI->F),
+                                getMappedNodeHandle(&CI), CalleeCallerMap);
+    
+  // Find the arguments we need to compress.
+  unsigned NumPoolArgs = FI ? FI->ArgNodes.size() : 0;
+  for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
+    if (isa<PointerType>(CI.getOperand(i)->getType()) && i > NumPoolArgs) {
+      Argument *FormalArg = next(FI->F.arg_begin(), i-1-NumPoolArgs);
+        
+      DSGraph::computeNodeMapping(CG->getNodeForValue(FormalArg),
+                                  getMappedNodeHandle(CI.getOperand(i)),
+                                  CalleeCallerMap);
+    }
+
+  // Now that we know the basic pools passed/returned through the
+  // argument/retval of the call, add the compressed pools that are reachable
+  // from them.  The CalleeCallerMap contains a mapping from callee nodes to the
+  // caller nodes they correspond to (a many-to-one mapping).
+  for (DSGraph::NodeMapTy::iterator I = CalleeCallerMap.begin(),
+         E = CalleeCallerMap.end(); I != E; ++I) {
+    // If the destination is compressed, so should the source be.
+    if (PoolInfo.count(I->second.getNode()))
+      PoolsToCompress.insert(I->first);
+  }
+
+  // If this function doesn't require compression, there is nothing to do!
+  if (PoolsToCompress.empty()) return;
+    
+  // Get the clone of this function that uses compressed pointers instead of
+  // normal pointers.
+  Function *Clone = PtrComp.GetFunctionClone(Callee, PoolsToCompress,
+                                             *FI, *CG);
+
+
+  // Okay, we now have our clone: rewrite the call instruction.
+  std::vector<Value*> Operands;
+  Operands.reserve(CI.getNumOperands()-1);
+
+  Function::arg_iterator AI = FI->F.arg_begin();
+  
+  // Pass pool descriptors.
+  for (unsigned i = 1; i != NumPoolArgs+1; ++i)
+    Operands.push_back(CI.getOperand(i));
+
+  for (unsigned i = NumPoolArgs+1, e = CI.getNumOperands(); i != e; ++i, ++AI)
+    if (isa<PointerType>(CI.getOperand(i)->getType()) &&
+        PoolsToCompress.count(CG->getNodeForValue(AI).getNode()))
+      Operands.push_back(getTransformedValue(CI.getOperand(i)));
+    else
+      Operands.push_back(CI.getOperand(i));
+
+  Value *NC = new CallInst(Clone, Operands, CI.getName(), &CI);
+  if (NC->getType() != CI.getType())      // Compressing return value?
+    setTransformedValue(CI, NC);
+  else {
+    if (CI.getType() != Type::VoidTy)
+      CI.replaceAllUsesWith(NC);
+    ValueReplaced(CI, NC);
+    CI.eraseFromParent();
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                    PointerCompress Implementation
+//===----------------------------------------------------------------------===//
+
+void PointerCompress::getAnalysisUsage(AnalysisUsage &AU) const {
+  // Need information about how pool allocation happened.
+  AU.addRequired<PoolAllocatePassAllPools>();
+
+  // Need information from DSA.
+  AU.addRequired<EquivClassGraphs>();
+}
+
+/// PoolIsCompressible - Return true if we can pointer compress this node.
+/// If not, we should DEBUG print out why.
+static bool PoolIsCompressible(const DSNode *N) {
+  assert(!N->isForwarding() && "Should not be dealing with merged nodes!");
+  if (N->isNodeCompletelyFolded()) {
+    DEBUG(std::cerr << "Node is not type-safe:\n");
+    return false;
+  }
+
+  // FIXME: If any non-type-safe nodes point to this one, we cannot compress it.
+#if 0
+  bool HasFields = false;
+  for (DSNode::const_edge_iterator I = N->edge_begin(), E = N->edge_end();
+       I != E; ++I)
+    if (!I->isNull()) {
+      HasFields = true;
+      if (I->getNode() != N) {
+        // We currently only handle trivially self cyclic DS's right now.
+        DEBUG(std::cerr << "Node points to nodes other than itself:\n");
+        return false;
+      }        
+    }
+
+  if (!HasFields) {
+    DEBUG(std::cerr << "Node does not contain any pointers to compress:\n");
+    return false;
+  }
+#endif
+
+  if ((N->getNodeFlags() & DSNode::Composition) != DSNode::HeapNode) {
+    DEBUG(std::cerr << "Node contains non-heap values:\n");
+    return false;
+  }
+
+  return true;
+}
+
+/// FindPoolsToCompress - Inspect the specified function and find pools that are
+/// compressible that are homed in that function.  Return those pools in the
+/// Pools set.
+void PointerCompress::FindPoolsToCompress(std::set<const DSNode*> &Pools,
+                                          std::map<const DSNode*,
+                                          Value*> &PreassignedPools,
+                                          Function &F, DSGraph &DSG,
+                                          PA::FuncInfo *FI) {
+  DEBUG(std::cerr << "In function '" << F.getName() << "':\n");
+  for (unsigned i = 0, e = FI->NodesToPA.size(); i != e; ++i) {
+    const DSNode *N = FI->NodesToPA[i];
+
+    // Ignore potential pools that the pool allocation heuristic decided not to
+    // pool allocated.
+    if (!isa<ConstantPointerNull>(FI->PoolDescriptors[N]))
+      if (PoolIsCompressible(N)) {
+        Pools.insert(N);
+        ++NumCompressed;
+      } else {
+        DEBUG(std::cerr << "PCF: "; N->dump());
+        ++NumNotCompressed;
+      }
+  }
+
+  // If there are no compressed global pools, don't bother to look for them.
+  if (CompressedGlobalPools.empty()) return;
+
+  // Calculate which DSNodes are reachable from globals.  If a node is reachable
+  // from a global, we check to see if the global pool is compressed.
+  DSGraph &GG = ECG->getGlobalsGraph();
+
+  // Map all node reachable from this global to the corresponding nodes in the
+  // globals graph.
+  DSGraph::NodeMapTy GlobalsGraphNodeMapping;
+  DSG.computeGToGGMapping(GlobalsGraphNodeMapping);
+
+  // See if there are nodes in this graph that correspond to nodes in the
+  // globals graph, and if so, if it is compressed.
+  for (DSGraph::node_iterator I = DSG.node_begin(), E = DSG.node_end();
+       I != E;++I)
+    if (GlobalsGraphNodeMapping.count(I)) {
+      // If it is a global pool, set up the pool descriptor appropriately.
+      DSNode *GGN = GlobalsGraphNodeMapping[I].getNode();
+      if (CompressedGlobalPools.count(GGN)) {
+        Pools.insert(I);
+        PreassignedPools[I] = CompressedGlobalPools[GGN];
+      }
+    }
+}
+
+
+/// CompressPoolsInFunction - Find all pools that are compressible in this
+/// function and compress them.
+bool PointerCompress::
+CompressPoolsInFunction(Function &F,
+                        std::vector<std::pair<Value*, Value*> > *PremappedVals,
+                        std::set<const DSNode*> *ExternalPoolsToCompress){
+  if (F.isExternal()) return false;
+
+  // If this is a pointer compressed clone of a pool allocated function, get the
+  // the pool allocated function.  Rewriting a clone means that there are
+  // incoming arguments that point into compressed pools.
+  FunctionCloneRecord *FCR = getCloneInfo(F);
+  Function *CloneSource = FCR ? FCR->PAFn : 0;
+
+  PA::FuncInfo *FI;
+  if (CloneSource)
+    FI = PoolAlloc->getFuncInfoOrClone(*CloneSource);
+  else
+    FI = PoolAlloc->getFuncInfoOrClone(F);
+
+  if (FI == 0) {
+    std::cerr << "DIDN'T FIND POOL INFO FOR: "
+              << *F.getType() << F.getName() << "!\n";
+    return false;
+  }
+
+  // If this function was cloned, and this is the original function, ignore it
+  // (it's dead).  We'll deal with the cloned version later when we run into it
+  // again.
+  if (FI->Clone && &FI->F == &F)
+    return false;
+
+  // Get the DSGraph for this function.
+  DSGraph &DSG = ECG->getDSGraph(FI->F);
+
+  std::set<const DSNode*> PoolsToCompressSet;
+
+  // Compute the set of compressible pools in this function that are hosted
+  // here.
+  std::map<const DSNode*, Value*> PreassignedPools;
+  FindPoolsToCompress(PoolsToCompressSet, PreassignedPools, F, DSG, FI);
+
+  // Handle pools that are passed into the function through arguments or
+  // returned by the function.  If this occurs, we must be dealing with a ptr
+  // compressed clone of the pool allocated clone of the original function.
+  if (ExternalPoolsToCompress)
+    PoolsToCompressSet.insert(ExternalPoolsToCompress->begin(),
+                              ExternalPoolsToCompress->end());
+
+  // If there is nothing that we can compress, exit now.
+  if (PoolsToCompressSet.empty()) return false;
+
+  // Compute the initial collection of compressed pointer infos.
+  std::map<const DSNode*, CompressedPoolInfo> PoolsToCompress;
+
+  for (std::set<const DSNode*>::iterator I = PoolsToCompressSet.begin(),
+         E = PoolsToCompressSet.end(); I != E; ++I) {
+    Value *PD;
+    if (Value *PAPD = PreassignedPools[*I])
+      PD = PAPD;                             // Must be a global pool.
+    else if (FCR)
+      PD = FCR->PoolDescriptors.find(*I)->second;
+    else
+      PD = FI->PoolDescriptors[*I];
+    assert(PD && "No pool descriptor available for this pool???");
+    
+    PoolsToCompress.insert(std::make_pair(*I, CompressedPoolInfo(*I, PD)));
+  }
+
+  // Use these to compute the closure of compression information.  In
+  // particular, if one pool points to another, we need to know if the outgoing
+  // pointer is compressed.
+  const TargetData &TD = DSG.getTargetData();
+  std::cerr << "In function '" << F.getName() << "':\n";
+  for (std::map<const DSNode*, CompressedPoolInfo>::iterator
+         I = PoolsToCompress.begin(), E = PoolsToCompress.end(); I != E; ++I) {
+
+    I->second.Initialize(PoolsToCompress, TD);
+
+    // Only dump info about a compressed pool if this is the home for it.
+    if (isa<AllocaInst>(I->second.getPoolDesc()) ||
+        (isa<GlobalValue>(I->second.getPoolDesc()) &&
+         F.hasExternalLinkage() && F.getName() == "main")) {
+      std::cerr << "  COMPRESSING POOL:\nPCS:";
+      I->second.dump();
+    }
+  }
+  
+  // Finally, rewrite the function body to use compressed pointers!
+  InstructionRewriter IR(PoolsToCompress, DSG, *FI, FCR, *this);
+  if (PremappedVals)
+    IR.PremapValues(*PremappedVals);
+  IR.visit(F);
+  return true;
+}
+
+
+/// GetExtFunctionClone - Return a clone of the specified external function with
+/// the specified arguments compressed.
+Function *PointerCompress::
+GetExtFunctionClone(Function *F, const std::vector<unsigned> &ArgsToComp) {
+  assert(!ArgsToComp.empty() && "No reason to make a clone!");
+  Function *&Clone = ExtCloneFunctionMap[std::make_pair(F, ArgsToComp)];
+  if (Clone) return Clone;
+
+  const FunctionType *FTy = F->getFunctionType();
+  const Type *RetTy = FTy->getReturnType();
+  unsigned ArgIdx = 0;
+  if (isa<PointerType>(RetTy) && ArgsToComp[0] == 0) {
+    RetTy = SCALARUINTTYPE;
+    ++ArgIdx;
+  }
+
+  std::vector<const Type*> ParamTypes;
+
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    if (ArgIdx < ArgsToComp.size() && ArgsToComp[ArgIdx]-1 == i) {
+      // Compressed pool, pass an index.
+      ParamTypes.push_back(SCALARUINTTYPE);
+      ++ArgIdx;
+    } else {
+      ParamTypes.push_back(FTy->getParamType(i));
+    }
+  FunctionType *CFTy = FunctionType::get(RetTy, ParamTypes, FTy->isVarArg());
+
+  // Next, create the clone prototype and insert it into the module.
+  Clone = new Function(CFTy, GlobalValue::ExternalLinkage,
+                       F->getName()+"_pc");
+  F->getParent()->getFunctionList().insert(F, Clone);
+  return Clone;
+}
+
+/// GetFunctionClone - Lazily create clones of pool allocated functions that we
+/// need in compressed form.  This memoizes the functions that have been cloned
+/// to allow only one clone of each function in a desired permutation.
+Function *PointerCompress::
+GetFunctionClone(Function *F, std::set<const DSNode*> &PoolsToCompress,
+                 PA::FuncInfo &FI, const DSGraph &CG) {
+  assert(!PoolsToCompress.empty() && "No clone needed!");
+
+  // Check to see if we have already compressed this function, if so, there is
+  // no need to make another clone.  This is also important to avoid infinite
+  // recursion.
+  Function *&Clone = ClonedFunctionMap[std::make_pair(F, PoolsToCompress)];
+  if (Clone) return Clone;
+
+  // First step, construct the new function prototype.
+  const FunctionType *FTy = F->getFunctionType();
+  const Type *RetTy = FTy->getReturnType();
+  if (isa<PointerType>(RetTy) &&
+      PoolsToCompress.count(CG.getReturnNodeFor(FI.F).getNode())) {
+    RetTy = SCALARUINTTYPE;
+  }
+  std::vector<const Type*> ParamTypes;
+  unsigned NumPoolArgs = FI.ArgNodes.size();
+
+  // Pass all pool args unmodified.
+  for (unsigned i = 0; i != NumPoolArgs; ++i)
+    ParamTypes.push_back(FTy->getParamType(i));
+
+  Function::arg_iterator AI = FI.F.arg_begin();
+  for (unsigned i = NumPoolArgs, e = FTy->getNumParams(); i != e; ++i, ++AI)
+    if (isa<PointerType>(FTy->getParamType(i)) &&
+        PoolsToCompress.count(CG.getNodeForValue(AI).getNode())) {
+      // Compressed pool, pass an index.
+      ParamTypes.push_back(SCALARUINTTYPE);
+    } else {
+      ParamTypes.push_back(FTy->getParamType(i));
+    }
+  FunctionType *CFTy = FunctionType::get(RetTy, ParamTypes, FTy->isVarArg());
+
+  // Next, create the clone prototype and insert it into the module.
+  Clone = new Function(CFTy, GlobalValue::InternalLinkage,
+                       F->getName()+".pc");
+  F->getParent()->getFunctionList().insert(F, Clone);
+
+  // Remember where this clone came from.
+  FunctionCloneRecord &CFI = 
+    ClonedFunctionInfoMap.insert(std::make_pair(Clone, F)).first->second;
+
+  ++NumCloned;
+  std::cerr << " CLONING FUNCTION: " << F->getName() << " -> "
+            << Clone->getName() << "\n";
+
+  if (F->isExternal()) {
+    Clone->setLinkage(GlobalValue::ExternalLinkage);
+    return Clone;
+  }
+
+  std::map<const Value*, Value*> ValueMap;
+
+  // Create dummy Value*'s of pointer type for any arguments that are
+  // compressed.  These are needed to satisfy typing constraints before the
+  // function body has been rewritten.
+  std::vector<std::pair<Value*,Value*> > RemappedArgs;
+
+  // Process arguments, setting up the ValueMap for them.
+  Function::arg_iterator CI = Clone->arg_begin();// Iterate over cloned fn args.
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++CI) {
+    // Transfer the argument names over.
+    CI->setName(I->getName());
+
+    // If we are compressing this argument, set up RemappedArgs.
+    if (CI->getType() != I->getType()) {
+      // Create a useless value* that is only needed to hold the uselist for the
+      // argument.
+      Value *V = new Argument(I->getType());   // dummy argument
+      RemappedArgs.push_back(std::make_pair(V, CI));
+      ValueMap[I] = V;
+    } else {
+      // Otherwise, just remember the mapping.
+      ValueMap[I] = CI;
+    }
+  }
+
+  // Clone the actual function body over.
+  std::vector<ReturnInst*> Returns;
+  CloneFunctionInto(Clone, F, ValueMap, Returns);
+  Returns.clear();  // Don't need this.
+  
+  // Invert the ValueMap into the NewToOldValueMap
+  std::map<Value*, const Value*> &NewToOldValueMap = CFI.NewToOldValueMap;
+  for (std::map<const Value*, Value*>::iterator I = ValueMap.begin(),
+         E = ValueMap.end(); I != E; ++I)
+    NewToOldValueMap.insert(std::make_pair(I->second, I->first));
+
+  // Compute the PoolDescriptors map for the cloned function.
+  for (std::map<const DSNode*, Value*>::iterator I =
+         FI.PoolDescriptors.begin(), E = FI.PoolDescriptors.end();
+       I != E; ++I)
+    CFI.PoolDescriptors[I->first] = ValueMap[I->second];
+  
+  ValueMap.clear();
+  
+  // Recursively transform the function.
+  CompressPoolsInFunction(*Clone, &RemappedArgs, &PoolsToCompress);
+  return Clone;
+}
+
+
+// Handle all pools pointed to by global variables.
+void PointerCompress::HandleGlobalPools(Module &M) {
+  if (PoolAlloc->GlobalNodes.empty()) return;
+
+  DEBUG(std::cerr << "Inspecting global nodes:\n");
+
+  // Loop over all of the global nodes identified by the pool allocator.
+  for (std::map<const DSNode*, Value*>::iterator I =
+         PoolAlloc->GlobalNodes.begin(), E = PoolAlloc->GlobalNodes.end();
+       I != E; ++I) {
+    const DSNode *N = I->first;
+
+    // Ignore potential pools that the pool allocation heuristic decided not to
+    // pool allocated.
+    if (!isa<ConstantPointerNull>(I->second))
+      if (PoolIsCompressible(N)) {
+        CompressedGlobalPools.insert(std::make_pair(N, 
+                                             cast<GlobalValue>(I->second)));
+        ++NumCompressed;
+      } else {
+        DEBUG(std::cerr << "PCF: "; N->dump());
+        ++NumNotCompressed;
+      }
+  }
+}
+
+
+/// InitializePoolLibraryFunctions - Create the function prototypes for pointer
+/// compress runtime library functions.
+void PointerCompress::InitializePoolLibraryFunctions(Module &M) {
+  const Type *VoidPtrTy = PointerType::get(Type::SByteTy);
+  const Type *PoolDescPtrTy = PointerType::get(ArrayType::get(VoidPtrTy, 16));
+
+  PoolInitPC = M.getOrInsertFunction("poolinit_pc", VoidPtrTy, PoolDescPtrTy, 
+                                     Type::UIntTy, Type::UIntTy, 0);
+  PoolDestroyPC = M.getOrInsertFunction("pooldestroy_pc", Type::VoidTy,
+                                        PoolDescPtrTy, 0);
+  PoolAllocPC = M.getOrInsertFunction("poolalloc_pc", SCALARUINTTYPE,
+                                      PoolDescPtrTy, Type::UIntTy, 0);
+  // FIXME: Need bumppointer versions as well as realloc??/memalign??
+}
+
+bool PointerCompress::runOnModule(Module &M) {
+  PoolAlloc = &getAnalysis<PoolAllocatePassAllPools>();
+  ECG = &getAnalysis<EquivClassGraphs>();
+  
+  if (SmallIntCompress)
+    MEMUINTTYPE = Type::UShortTy;
+  else 
+    MEMUINTTYPE = Type::UIntTy;
+
+  // FIXME: make this IntPtrTy.
+  SCALARUINTTYPE = Type::ULongTy;
+
+  // Create the function prototypes for pointer compress runtime library
+  // functions.
+  InitializePoolLibraryFunctions(M);
+
+  // Handle all pools pointed to by global variables.
+  HandleGlobalPools(M);
+
+  std::set<Function*> TransformedFns;
+
+  // Iterate over all functions in the module, looking for compressible data
+  // structures.
+  bool Changed = false;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    // If this function is not a pointer-compressed or pool allocated clone,
+    // compress any pools in it now.
+    if (I->hasExternalLinkage()) {
+      Changed |= CompressPoolsInFunction(*I);
+      TransformedFns.insert(I);
+    }
+
+  // If compressing external functions (e.g. main), required other function
+  // bodies to be compressed that do not take pool arguments, handle them now.
+  for (unsigned i = 0; i != NoArgFunctionsCalled.size(); ++i)
+    if (TransformedFns.insert(NoArgFunctionsCalled[i]).second)
+      Changed |= CompressPoolsInFunction(*NoArgFunctionsCalled[i]);
+
+  NoArgFunctionsCalled.clear();
+  ClonedFunctionMap.clear();
+  return Changed;
+}


Index: llvm-poolalloc/lib/PoolAllocate/PoolAllocate.cpp
diff -u /dev/null llvm-poolalloc/lib/PoolAllocate/PoolAllocate.cpp:1.120
--- /dev/null	Wed May 18 14:56:44 2005
+++ llvm-poolalloc/lib/PoolAllocate/PoolAllocate.cpp	Wed May 18 14:56:28 2005
@@ -0,0 +1,870 @@
+//===-- PoolAllocate.cpp - Pool Allocation Pass ---------------------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This transform changes programs so that disjoint data structures are
+// allocated out of different pools of memory, increasing locality.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "poolalloc"
+#include "PoolAllocate.h"
+#include "Heuristic.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/DataStructure/DataStructure.h"
+#include "llvm/Analysis/DataStructure/DSGraph.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+using namespace PA;
+
+const Type *PoolAllocate::PoolDescPtrTy = 0;
+
+#if 0
+#define TIME_REGION(VARNAME, DESC) \
+   NamedRegionTimer VARNAME(DESC)
+#else
+#define TIME_REGION(VARNAME, DESC)
+#endif
+
+namespace {
+  RegisterOpt<PoolAllocate>
+  X("poolalloc", "Pool allocate disjoint data structures");
+  RegisterOpt<PoolAllocatePassAllPools>
+  Y("poolalloc-passing-all-pools", "Pool allocate disjoint data structures");
+
+  Statistic<> NumArgsAdded("poolalloc", "Number of function arguments added");
+  Statistic<> MaxArgsAdded("poolalloc", "Maximum function arguments added to one function");
+  Statistic<> NumCloned   ("poolalloc", "Number of functions cloned");
+  Statistic<> NumPools    ("poolalloc", "Number of pools allocated");
+  Statistic<> NumTSPools  ("poolalloc", "Number of typesafe pools");
+  Statistic<> NumPoolFree ("poolalloc", "Number of poolfree's elided");
+  Statistic<> NumNonprofit("poolalloc", "Number of DSNodes not profitable");
+  Statistic<> NumColocated("poolalloc", "Number of DSNodes colocated");
+
+  const Type *VoidPtrTy;
+
+  // The type to allocate for a pool descriptor.
+  const Type *PoolDescType;
+
+  cl::opt<bool>
+  DisableInitDestroyOpt("poolalloc-force-simple-pool-init",
+                        cl::desc("Always insert poolinit/pooldestroy calls at start and exit of functions"));//, cl::init(true));
+  cl::opt<bool>
+  DisablePoolFreeOpt("poolalloc-force-all-poolfrees",
+                     cl::desc("Do not try to elide poolfree's where possible"));
+}
+
+void PoolAllocate::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<EquivClassGraphs>();
+  AU.addPreserved<EquivClassGraphs>();
+  AU.addRequired<TargetData>();
+}
+
+bool PoolAllocate::runOnModule(Module &M) {
+  if (M.begin() == M.end()) return false;
+  CurModule = &M;
+  ECGraphs = &getAnalysis<EquivClassGraphs>();   // folded inlined CBU graphs
+
+  CurHeuristic = Heuristic::create();
+  CurHeuristic->Initialize(M, ECGraphs->getGlobalsGraph(), *this);
+
+  // Add the pool* prototypes to the module
+  AddPoolPrototypes();
+
+  // Create the pools for memory objects reachable by global variables.
+  if (SetupGlobalPools(M))
+    return true;
+
+  // Loop over the functions in the original program finding the pool desc.
+  // arguments necessary for each function that is indirectly callable.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!I->isExternal() && ECGraphs->ContainsDSGraphFor(*I))
+      FindFunctionPoolArgs(*I);
+
+  std::map<Function*, Function*> FuncMap;
+
+  // Now clone a function using the pool arg list obtained in the previous pass
+  // over the modules.  Loop over only the function initially in the program,
+  // don't traverse newly added ones.  If the function needs new arguments, make
+  // its clone.
+  std::set<Function*> ClonedFunctions;
+{TIME_REGION(X, "MakeFunctionClone");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!I->isExternal() && !ClonedFunctions.count(I) &&
+        ECGraphs->ContainsDSGraphFor(*I))
+      if (Function *Clone = MakeFunctionClone(*I)) {
+        FuncMap[I] = Clone;
+        ClonedFunctions.insert(Clone);
+      }
+}
+  
+  // Now that all call targets are available, rewrite the function bodies of the
+  // clones.
+{TIME_REGION(X, "ProcessFunctionBody");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!I->isExternal() && !ClonedFunctions.count(I) &&
+        ECGraphs->ContainsDSGraphFor(*I)) {
+      std::map<Function*, Function*>::iterator FI = FuncMap.find(I);
+      ProcessFunctionBody(*I, FI != FuncMap.end() ? *FI->second : *I);
+    }
+}
+  // Replace all uses of original functions with the transformed function.
+  for (std::map<Function *, Function *>::iterator I = FuncMap.begin(),
+         E = FuncMap.end(); I != E; ++I) {
+    Function *F = I->first;
+    F->replaceAllUsesWith(ConstantExpr::getCast(I->second, F->getType()));
+  }
+
+  if (CurHeuristic->IsRealHeuristic())
+    MicroOptimizePoolCalls();
+
+  delete CurHeuristic;
+  return true;
+}
+
+// AddPoolPrototypes - Add prototypes for the pool functions to the specified
+// module and update the Pool* instance variables to point to them.
+//
+// NOTE: If these are changed, make sure to update PoolOptimize.cpp as well!
+//
+void PoolAllocate::AddPoolPrototypes() {
+  if (VoidPtrTy == 0) {
+    // NOTE: If these are changed, make sure to update PoolOptimize.cpp as well!
+    VoidPtrTy = PointerType::get(Type::SByteTy);
+    PoolDescType = ArrayType::get(VoidPtrTy, 16);
+    PoolDescPtrTy = PointerType::get(PoolDescType);
+  }
+
+  CurModule->addTypeName("PoolDescriptor", PoolDescType);
+  
+  // Get poolinit function.
+  PoolInit = CurModule->getOrInsertFunction("poolinit", Type::VoidTy,
+                                            PoolDescPtrTy, Type::UIntTy,
+                                            Type::UIntTy, 0);
+
+  // Get pooldestroy function.
+  PoolDestroy = CurModule->getOrInsertFunction("pooldestroy", Type::VoidTy,
+                                               PoolDescPtrTy, 0);
+  
+  // The poolalloc function.
+  PoolAlloc = CurModule->getOrInsertFunction("poolalloc", 
+                                             VoidPtrTy, PoolDescPtrTy,
+                                             Type::UIntTy, 0);
+  
+  // The poolrealloc function.
+  PoolRealloc = CurModule->getOrInsertFunction("poolrealloc",
+                                               VoidPtrTy, PoolDescPtrTy,
+                                               VoidPtrTy, Type::UIntTy, 0);
+  // The poolmemalign function.
+  PoolMemAlign = CurModule->getOrInsertFunction("poolmemalign",
+                                                VoidPtrTy, PoolDescPtrTy,
+                                                Type::UIntTy, Type::UIntTy, 0);
+
+  // Get the poolfree function.
+  PoolFree = CurModule->getOrInsertFunction("poolfree", Type::VoidTy,
+                                            PoolDescPtrTy, VoidPtrTy, 0);  
+}
+
+static void getCallsOf(Function *F, std::vector<CallInst*> &Calls) {
+  Calls.clear();
+  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E; ++UI)
+    Calls.push_back(cast<CallInst>(*UI));
+}
+
+static void OptimizePointerNotNull(Value *V) {
+  for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+    Instruction *User = cast<Instruction>(*I);
+    if (User->getOpcode() == Instruction::SetEQ ||
+        User->getOpcode() == Instruction::SetNE) {
+      if (isa<Constant>(User->getOperand(1)) && 
+          cast<Constant>(User->getOperand(1))->isNullValue()) {
+        bool CondIsTrue = User->getOpcode() == Instruction::SetNE;
+        User->replaceAllUsesWith(ConstantBool::get(CondIsTrue));
+      }
+    } else if (User->getOpcode() == Instruction::Cast) {
+      // Casted pointers are also not null.
+      if (isa<PointerType>(User->getType()))
+        OptimizePointerNotNull(User);
+    } else if (User->getOpcode() == Instruction::GetElementPtr) {
+      // GEP'd pointers are also not null.
+      OptimizePointerNotNull(User);
+    }
+  }
+}
+
+/// MicroOptimizePoolCalls - Apply any microoptimizations to calls to pool
+/// allocation function calls that we can.  This runs after the whole program
+/// has been transformed.
+void PoolAllocate::MicroOptimizePoolCalls() {
+  // Optimize poolalloc
+  std::vector<CallInst*> Calls;
+  getCallsOf(PoolAlloc, Calls);
+  for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
+    CallInst *CI = Calls[i];
+    // poolalloc never returns null.  Loop over all uses of the call looking for
+    // set(eq|ne) X, null.
+    OptimizePointerNotNull(CI);
+  }
+
+  // TODO: poolfree accepts a null pointer, so remove any check above it, like
+  // 'if (P) poolfree(P)'
+}
+
+
+
+
+static void GetNodesReachableFromGlobals(DSGraph &G,
+                                  hash_set<const DSNode*> &NodesFromGlobals) {
+  for (DSScalarMap::global_iterator I = G.getScalarMap().global_begin(), 
+         E = G.getScalarMap().global_end(); I != E; ++I)
+    G.getNodeForValue(*I).getNode()->markReachableNodes(NodesFromGlobals);
+}
+
+static void MarkNodesWhichMustBePassedIn(hash_set<const DSNode*> &MarkedNodes,
+                                         Function &F, DSGraph &G,
+                                         bool PassAllArguments) {
+  // Mark globals and incomplete nodes as live... (this handles arguments)
+  if (F.getName() != "main") {
+    // All DSNodes reachable from arguments must be passed in.
+    for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+         I != E; ++I) {
+      DSGraph::ScalarMapTy::iterator AI = G.getScalarMap().find(I);
+      if (AI != G.getScalarMap().end())
+        if (DSNode *N = AI->second.getNode())
+          N->markReachableNodes(MarkedNodes);
+    }
+  }
+
+  // Marked the returned node as needing to be passed in.
+  if (DSNode *RetNode = G.getReturnNodeFor(F).getNode())
+    RetNode->markReachableNodes(MarkedNodes);
+
+  // Calculate which DSNodes are reachable from globals.  If a node is reachable
+  // from a global, we will create a global pool for it, so no argument passage
+  // is required.
+  hash_set<const DSNode*> NodesFromGlobals;
+  GetNodesReachableFromGlobals(G, NodesFromGlobals);
+
+  // Remove any nodes reachable from a global.  These nodes will be put into
+  // global pools, which do not require arguments to be passed in.  Also, erase
+  // any marked node that is not a heap node.  Since no allocations or frees
+  // will be done with it, it needs no argument.
+  for (hash_set<const DSNode*>::iterator I = MarkedNodes.begin(),
+         E = MarkedNodes.end(); I != E; ) {
+    const DSNode *N = *I++;
+    if ((!N->isHeapNode() && !PassAllArguments) || NodesFromGlobals.count(N))
+      MarkedNodes.erase(N);
+  }
+}
+
+
+/// FindFunctionPoolArgs - In the first pass over the program, we decide which
+/// arguments will have to be added for each function, build the FunctionInfo
+/// map and recording this info in the ArgNodes set.
+void PoolAllocate::FindFunctionPoolArgs(Function &F) {
+  DSGraph &G = ECGraphs->getDSGraph(F);
+
+  // Create a new entry for F.
+  FuncInfo &FI =
+    FunctionInfo.insert(std::make_pair(&F, FuncInfo(F))).first->second;
+  hash_set<const DSNode*> &MarkedNodes = FI.MarkedNodes;
+
+  if (G.node_begin() == G.node_end())
+    return;  // No memory activity, nothing is required
+
+  // Find DataStructure nodes which are allocated in pools non-local to the
+  // current function.  This set will contain all of the DSNodes which require
+  // pools to be passed in from outside of the function.
+  MarkNodesWhichMustBePassedIn(MarkedNodes, F, G, PassAllArguments);
+  
+  FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end());
+}
+
+// MakeFunctionClone - If the specified function needs to be modified for pool
+// allocation support, make a clone of it, adding additional arguments as
+// necessary, and return it.  If not, just return null.
+//
+Function *PoolAllocate::MakeFunctionClone(Function &F) {
+  DSGraph &G = ECGraphs->getDSGraph(F);
+  if (G.node_begin() == G.node_end()) return 0;
+    
+  FuncInfo &FI = *getFuncInfo(F);
+  if (FI.ArgNodes.empty())
+    return 0;           // No need to clone if no pools need to be passed in!
+
+  // Update statistics..
+  NumArgsAdded += FI.ArgNodes.size();
+  if (MaxArgsAdded < FI.ArgNodes.size()) MaxArgsAdded = FI.ArgNodes.size();
+  ++NumCloned;
+ 
+      
+  // Figure out what the arguments are to be for the new version of the function
+  const FunctionType *OldFuncTy = F.getFunctionType();
+  std::vector<const Type*> ArgTys(FI.ArgNodes.size(), PoolDescPtrTy);
+  ArgTys.reserve(OldFuncTy->getNumParams() + FI.ArgNodes.size());
+
+  ArgTys.insert(ArgTys.end(), OldFuncTy->param_begin(), OldFuncTy->param_end());
+
+  // Create the new function prototype
+  FunctionType *FuncTy = FunctionType::get(OldFuncTy->getReturnType(), ArgTys,
+                                           OldFuncTy->isVarArg());
+  // Create the new function...
+  Function *New = new Function(FuncTy, Function::InternalLinkage, F.getName());
+  F.getParent()->getFunctionList().insert(&F, New);
+  CloneToOrigMap[New] = &F;   // Remember original function.
+
+  // Set the rest of the new arguments names to be PDa<n> and add entries to the
+  // pool descriptors map
+  std::map<const DSNode*, Value*> &PoolDescriptors = FI.PoolDescriptors;
+  Function::arg_iterator NI = New->arg_begin();
+  
+  for (unsigned i = 0, e = FI.ArgNodes.size(); i != e; ++i, ++NI) {
+    NI->setName("PDa");
+    PoolDescriptors[FI.ArgNodes[i]] = NI;
+  }
+
+  // Map the existing arguments of the old function to the corresponding
+  // arguments of the new function, and copy over the names.
+  std::map<const Value*, Value*> ValueMap;
+  for (Function::arg_iterator I = F.arg_begin();
+       NI != New->arg_end(); ++I, ++NI) {
+    ValueMap[I] = NI;
+    NI->setName(I->getName());
+  }
+
+  // Perform the cloning.
+  std::vector<ReturnInst*> Returns;
+{TIME_REGION(X, "CloneFunctionInto");
+  CloneFunctionInto(New, &F, ValueMap, Returns);
+}
+  // Invert the ValueMap into the NewToOldValueMap
+  std::map<Value*, const Value*> &NewToOldValueMap = FI.NewToOldValueMap;
+
+  for (std::map<const Value*, Value*>::iterator I = ValueMap.begin(),
+         E = ValueMap.end(); I != E; ++I)
+    NewToOldValueMap.insert(std::make_pair(I->second, I->first));
+  return FI.Clone = New;
+}
+
+// SetupGlobalPools - Create global pools for all DSNodes in the globals graph
+// which contain heap objects.  If a global variable points to a piece of memory
+// allocated from the heap, this pool gets a global lifetime.  This is
+// implemented by making the pool descriptor be a global variable of it's own,
+// and initializing the pool on entrance to main.  Note that we never destroy
+// the pool, because it has global lifetime.
+//
+// This method returns true if correct pool allocation of the module cannot be
+// performed because there is no main function for the module and there are
+// global pools.
+//
+bool PoolAllocate::SetupGlobalPools(Module &M) {
+  // Get the globals graph for the program.
+  DSGraph &GG = ECGraphs->getGlobalsGraph();
+
+  // Get all of the nodes reachable from globals.
+  hash_set<const DSNode*> GlobalHeapNodes;
+  GetNodesReachableFromGlobals(GG, GlobalHeapNodes);
+
+  // Filter out all nodes which have no heap allocations merged into them.
+  for (hash_set<const DSNode*>::iterator I = GlobalHeapNodes.begin(),
+         E = GlobalHeapNodes.end(); I != E; ) {
+    hash_set<const DSNode*>::iterator Last = I++;
+    if (!(*Last)->isHeapNode())
+      GlobalHeapNodes.erase(Last);
+  }
+  
+  // Otherwise get the main function to insert the poolinit calls.
+  Function *MainFunc = M.getMainFunction();
+  if (MainFunc == 0 || MainFunc->isExternal()) {
+    std::cerr << "Cannot pool allocate this program: it has global "
+              << "pools but no 'main' function yet!\n";
+    return true;
+  }
+
+  std::cerr << "Pool allocating " << GlobalHeapNodes.size()
+            << " global nodes!\n";
+
+
+  std::vector<const DSNode*> NodesToPA(GlobalHeapNodes.begin(),
+                                       GlobalHeapNodes.end());
+  std::vector<Heuristic::OnePool> ResultPools;
+  CurHeuristic->AssignToPools(NodesToPA, 0, GG, ResultPools);
+
+  BasicBlock::iterator InsertPt = MainFunc->getEntryBlock().begin();
+  while (isa<AllocaInst>(InsertPt)) ++InsertPt;
+
+  // Perform all global assignments as specified.
+  for (unsigned i = 0, e = ResultPools.size(); i != e; ++i) {
+    Heuristic::OnePool &Pool = ResultPools[i];
+    Value *PoolDesc = Pool.PoolDesc;
+    if (PoolDesc == 0) {
+      PoolDesc = CreateGlobalPool(Pool.PoolSize, Pool.PoolAlignment, InsertPt);
+
+      if (Pool.NodesInPool.size() == 1 &&
+          !Pool.NodesInPool[0]->isNodeCompletelyFolded())
+        ++NumTSPools;
+    }
+    for (unsigned N = 0, e = Pool.NodesInPool.size(); N != e; ++N) {
+      GlobalNodes[Pool.NodesInPool[N]] = PoolDesc;
+      GlobalHeapNodes.erase(Pool.NodesInPool[N]);  // Handled!
+    }
+  }
+
+  // Any unallocated DSNodes get null pool descriptor pointers.
+  for (hash_set<const DSNode*>::iterator I = GlobalHeapNodes.begin(),
+         E = GlobalHeapNodes.end(); I != E; ++I) {
+    GlobalNodes[*I] = Constant::getNullValue(PointerType::get(PoolDescType));
+    ++NumNonprofit;
+  }
+  
+  return false;
+}
+
+/// CreateGlobalPool - Create a global pool descriptor object, and insert a
+/// poolinit for it into main.  IPHint is an instruction that we should insert
+/// the poolinit before if not null.
+GlobalVariable *PoolAllocate::CreateGlobalPool(unsigned RecSize, unsigned Align,
+                                               Instruction *IPHint) {
+  GlobalVariable *GV =
+    new GlobalVariable(PoolDescType, false, GlobalValue::InternalLinkage, 
+                       Constant::getNullValue(PoolDescType), "GlobalPool",
+                       CurModule);
+
+  // Update the global DSGraph to include this.
+  DSNode *GNode = ECGraphs->getGlobalsGraph().addObjectToGraph(GV);
+  GNode->setModifiedMarker()->setReadMarker();
+
+  Function *MainFunc = CurModule->getMainFunction();
+  assert(MainFunc && "No main in program??");
+
+  BasicBlock::iterator InsertPt;
+  if (IPHint)
+    InsertPt = IPHint;
+  else {
+    InsertPt = MainFunc->getEntryBlock().begin();
+    while (isa<AllocaInst>(InsertPt)) ++InsertPt;
+  }
+
+  Value *ElSize = ConstantUInt::get(Type::UIntTy, RecSize);
+  Value *AlignV = ConstantUInt::get(Type::UIntTy, Align);
+  new CallInst(PoolInit, make_vector((Value*)GV, ElSize, AlignV, 0),
+               "", InsertPt);
+  ++NumPools;
+  return GV;
+}
+
+
+// CreatePools - This creates the pool initialization and destruction code for
+// the DSNodes specified by the NodesToPA list.  This adds an entry to the
+// PoolDescriptors map for each DSNode.
+//
+void PoolAllocate::CreatePools(Function &F, DSGraph &DSG,
+                               const std::vector<const DSNode*> &NodesToPA,
+                               std::map<const DSNode*,
+                                        Value*> &PoolDescriptors) {
+  if (NodesToPA.empty()) return;
+  TIME_REGION(X, "CreatePools");
+
+  std::vector<Heuristic::OnePool> ResultPools;
+  CurHeuristic->AssignToPools(NodesToPA, &F, *NodesToPA[0]->getParentGraph(),
+                              ResultPools);
+
+  std::set<const DSNode*> UnallocatedNodes(NodesToPA.begin(), NodesToPA.end());
+
+  BasicBlock::iterator InsertPoint = F.front().begin();
+  while (isa<AllocaInst>(InsertPoint)) ++InsertPoint;
+
+  // Is this main?  If so, make the pool descriptors globals, not automatic
+  // vars.
+  bool IsMain = F.getName() == "main" && F.hasExternalLinkage();
+
+  // Perform all global assignments as specified.
+  for (unsigned i = 0, e = ResultPools.size(); i != e; ++i) {
+    Heuristic::OnePool &Pool = ResultPools[i];
+    Value *PoolDesc = Pool.PoolDesc;
+    if (PoolDesc == 0) {
+      // Create a pool descriptor for the pool.  The poolinit will be inserted
+      // later.
+      if (!IsMain) {
+        PoolDesc = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
+
+        // Create a node in DSG to represent the new alloca.
+        DSNode *NewNode = DSG.addObjectToGraph(PoolDesc);
+        NewNode->setModifiedMarker()->setReadMarker();  // This is M/R
+      } else {
+        PoolDesc = CreateGlobalPool(Pool.PoolSize, Pool.PoolAlignment,
+                                    InsertPoint);
+
+        // Add the global node to main's graph.
+        DSNode *NewNode = DSG.addObjectToGraph(PoolDesc);
+        NewNode->setModifiedMarker()->setReadMarker();  // This is M/R
+
+        if (Pool.NodesInPool.size() == 1 &&
+            !Pool.NodesInPool[0]->isNodeCompletelyFolded())
+          ++NumTSPools;
+      }
+    }
+    for (unsigned N = 0, e = Pool.NodesInPool.size(); N != e; ++N) {
+      PoolDescriptors[Pool.NodesInPool[N]] = PoolDesc;
+      UnallocatedNodes.erase(Pool.NodesInPool[N]);  // Handled!
+    }
+  }
+
+  // Any unallocated DSNodes get null pool descriptor pointers.
+  for (std::set<const DSNode*>::iterator I = UnallocatedNodes.begin(),
+         E = UnallocatedNodes.end(); I != E; ++I) {
+    PoolDescriptors[*I] =Constant::getNullValue(PointerType::get(PoolDescType));
+    ++NumNonprofit;
+  }
+}
+
+// processFunction - Pool allocate any data structures which are contained in
+// the specified function.
+//
+void PoolAllocate::ProcessFunctionBody(Function &F, Function &NewF) {
+  DSGraph &G = ECGraphs->getDSGraph(F);
+
+  if (G.node_begin() == G.node_end()) return;  // Quick exit if nothing to do.
+  
+  FuncInfo &FI = *getFuncInfo(F);
+  hash_set<const DSNode*> &MarkedNodes = FI.MarkedNodes;
+
+  // Calculate which DSNodes are reachable from globals.  If a node is reachable
+  // from a global, we will create a global pool for it, so no argument passage
+  // is required.
+  DSGraph &GG = ECGraphs->getGlobalsGraph();
+
+  // Map all node reachable from this global to the corresponding nodes in
+  // the globals graph.
+  DSGraph::NodeMapTy GlobalsGraphNodeMapping;
+  G.computeGToGGMapping(GlobalsGraphNodeMapping);
+
+  // Loop over all of the nodes which are non-escaping, adding pool-allocatable
+  // ones to the NodesToPA vector.
+  for (DSGraph::node_iterator I = G.node_begin(), E = G.node_end(); I != E;++I){
+    // We only need to make a pool if there is a heap object in it...
+    DSNode *N = I;
+    if (N->isHeapNode())
+      if (GlobalsGraphNodeMapping.count(N)) {
+        // If it is a global pool, set up the pool descriptor appropriately.
+        DSNode *GGN = GlobalsGraphNodeMapping[N].getNode();
+        assert(GGN && GlobalNodes[GGN] && "No global node found??");
+        FI.PoolDescriptors[N] = GlobalNodes[GGN];
+      } else if (!MarkedNodes.count(N)) {
+        // Otherwise, if it was not passed in from outside the function, it must
+        // be a local pool!
+        assert(!N->isGlobalNode() && "Should be in global mapping!");
+        FI.NodesToPA.push_back(N);
+      }
+  }
+
+  if (!FI.NodesToPA.empty()) {
+    std::cerr << "[" << F.getName() << "] " << FI.NodesToPA.size()
+              << " nodes pool allocatable\n";
+    CreatePools(NewF, G, FI.NodesToPA, FI.PoolDescriptors);
+  } else {
+    DEBUG(std::cerr << "[" << F.getName() << "] transforming body.\n");
+  }
+  
+  // Transform the body of the function now... collecting information about uses
+  // of the pools.
+  std::multimap<AllocaInst*, Instruction*> PoolUses;
+  std::multimap<AllocaInst*, CallInst*> PoolFrees;
+  TransformBody(G, FI, PoolUses, PoolFrees, NewF);
+
+  // Create pool construction/destruction code
+  if (!FI.NodesToPA.empty())
+    InitializeAndDestroyPools(NewF, FI.NodesToPA, FI.PoolDescriptors,
+                              PoolUses, PoolFrees);
+  CurHeuristic->HackFunctionBody(NewF, FI.PoolDescriptors);
+}
+
+template<class IteratorTy>
+static void AllOrNoneInSet(IteratorTy S, IteratorTy E,
+                           std::set<BasicBlock*> &Blocks, bool &AllIn,
+                           bool &NoneIn) {
+  AllIn = true;
+  NoneIn = true;
+  for (; S != E; ++S)
+    if (Blocks.count(*S))
+      NoneIn = false;
+    else
+      AllIn = false;
+}
+
+static void DeleteIfIsPoolFree(Instruction *I, AllocaInst *PD,
+                             std::multimap<AllocaInst*, CallInst*> &PoolFrees) {
+  std::multimap<AllocaInst*, CallInst*>::iterator PFI, PFE;
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    for (tie(PFI,PFE) = PoolFrees.equal_range(PD); PFI != PFE; ++PFI)
+      if (PFI->second == I) {
+        PoolFrees.erase(PFI);
+        I->eraseFromParent();
+        ++NumPoolFree;
+        return;
+      }
+}
+
+void PoolAllocate::CalculateLivePoolFreeBlocks(std::set<BasicBlock*>&LiveBlocks,
+                                               Value *PD) {
+  for (Value::use_iterator I = PD->use_begin(), E = PD->use_end(); I != E; ++I){
+    // The only users of the pool should be call & invoke instructions.
+    CallSite U = CallSite::get(*I);
+    if (U.getCalledValue() != PoolFree && U.getCalledValue() != PoolDestroy) {
+      // This block and every block that can reach this block must keep pool
+      // frees.
+      for (idf_ext_iterator<BasicBlock*, std::set<BasicBlock*> >
+             DI = idf_ext_begin(U.getInstruction()->getParent(), LiveBlocks),
+             DE = idf_ext_end(U.getInstruction()->getParent(), LiveBlocks);
+           DI != DE; ++DI)
+        /* empty */;
+    }
+  }
+}
+
+/// InitializeAndDestroyPools- This inserts calls to poolinit and pooldestroy
+/// into the function to initialize and destroy one pool.
+///
+void PoolAllocate::InitializeAndDestroyPool(Function &F, const DSNode *Node,
+                          std::map<const DSNode*, Value*> &PoolDescriptors,
+                          std::multimap<AllocaInst*, Instruction*> &PoolUses,
+                          std::multimap<AllocaInst*, CallInst*> &PoolFrees) {
+  AllocaInst *PD = cast<AllocaInst>(PoolDescriptors[Node]);
+
+  // Convert the PoolUses/PoolFrees sets into something specific to this pool: a
+  // set of which blocks are immediately using the pool.
+  std::set<BasicBlock*> UsingBlocks;
+    
+  std::multimap<AllocaInst*, Instruction*>::iterator PUI, PUE;
+  tie(PUI, PUE) = PoolUses.equal_range(PD);
+  for (; PUI != PUE; ++PUI)
+    UsingBlocks.insert(PUI->second->getParent());
+    
+  // To calculate all of the basic blocks which require the pool to be
+  // initialized before, do a depth first search on the CFG from the using
+  // blocks.
+  std::set<BasicBlock*> InitializedBefore;
+  std::set<BasicBlock*> DestroyedAfter;
+  for (std::set<BasicBlock*>::iterator I = UsingBlocks.begin(),
+         E = UsingBlocks.end(); I != E; ++I) {
+    for (df_ext_iterator<BasicBlock*, std::set<BasicBlock*> >
+           DI = df_ext_begin(*I, InitializedBefore),
+           DE = df_ext_end(*I, InitializedBefore); DI != DE; ++DI)
+      /* empty */;
+      
+    for (idf_ext_iterator<BasicBlock*, std::set<BasicBlock*> >
+           DI = idf_ext_begin(*I, DestroyedAfter),
+           DE = idf_ext_end(*I, DestroyedAfter); DI != DE; ++DI)
+      /* empty */;
+  }
+  // Now that we have created the sets, intersect them.
+  std::set<BasicBlock*> LiveBlocks;
+  std::set_intersection(InitializedBefore.begin(),InitializedBefore.end(),
+                        DestroyedAfter.begin(), DestroyedAfter.end(),
+                        std::inserter(LiveBlocks, LiveBlocks.end()));
+  InitializedBefore.clear();
+  DestroyedAfter.clear();
+    
+  DEBUG(std::cerr << "POOL: " << PD->getName() << " information:\n");
+  DEBUG(std::cerr << "  Live in blocks: ");
+  DEBUG(for (std::set<BasicBlock*>::iterator I = LiveBlocks.begin(),
+               E = LiveBlocks.end(); I != E; ++I)
+          std::cerr << (*I)->getName() << " ");
+  DEBUG(std::cerr << "\n");
+    
+ 
+  std::vector<Instruction*> PoolInitPoints;
+  std::vector<Instruction*> PoolDestroyPoints;
+
+  if (DisableInitDestroyOpt) {
+    // Insert poolinit calls after all of the allocas...
+    Instruction *InsertPoint;
+    for (BasicBlock::iterator I = F.front().begin();
+         isa<AllocaInst>(InsertPoint = I); ++I)
+      /*empty*/;
+    PoolInitPoints.push_back(InsertPoint);
+
+    if (F.getName() != "main")
+      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+        if (isa<ReturnInst>(BB->getTerminator()) ||
+            isa<UnwindInst>(BB->getTerminator()))
+          PoolDestroyPoints.push_back(BB->getTerminator());
+  } else {
+    // Keep track of the blocks we have inserted poolinit/destroy into.
+    std::set<BasicBlock*> PoolInitInsertedBlocks, PoolDestroyInsertedBlocks;
+    
+    for (std::set<BasicBlock*>::iterator I = LiveBlocks.begin(),
+           E = LiveBlocks.end(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      TerminatorInst *Term = BB->getTerminator();
+      
+      // Check the predecessors of this block.  If any preds are not in the
+      // set, or if there are no preds, insert a pool init.
+      bool AllIn, NoneIn;
+      AllOrNoneInSet(pred_begin(BB), pred_end(BB), LiveBlocks, AllIn,
+                     NoneIn);
+      
+      if (NoneIn) {
+        if (!PoolInitInsertedBlocks.count(BB)) {
+          BasicBlock::iterator It = BB->begin();
+          while (isa<AllocaInst>(It) || isa<PHINode>(It)) ++It;
+#if 0
+          // Move through all of the instructions not in the pool
+          while (!PoolUses.count(std::make_pair(PD, It)))
+            // Advance past non-users deleting any pool frees that we run
+            // across.
+            DeleteIfIsPoolFree(It++, PD, PoolFrees);
+#endif
+          PoolInitPoints.push_back(It);
+          PoolInitInsertedBlocks.insert(BB);
+        }
+      } else if (!AllIn) {
+      TryAgainPred:
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E;
+             ++PI)
+          if (!LiveBlocks.count(*PI) && !PoolInitInsertedBlocks.count(*PI)){
+            if (SplitCriticalEdge(BB, PI))
+              // If the critical edge was split, *PI was invalidated
+              goto TryAgainPred;
+            
+            // Insert at the end of the predecessor, before the terminator.
+            PoolInitPoints.push_back((*PI)->getTerminator());
+            PoolInitInsertedBlocks.insert(*PI);
+          }
+      }
+      // Check the successors of this block.  If some succs are not in the
+      // set, insert destroys on those successor edges.  If all succs are
+      // not in the set, insert a destroy in this block.
+      AllOrNoneInSet(succ_begin(BB), succ_end(BB), LiveBlocks,
+                     AllIn, NoneIn);
+      
+      if (NoneIn) {
+        // Insert before the terminator.
+        if (!PoolDestroyInsertedBlocks.count(BB)) {
+          BasicBlock::iterator It = Term;
+          
+          // Rewind to the first using instruction.
+#if 0
+          while (!PoolUses.count(std::make_pair(PD, It)))
+            DeleteIfIsPoolFree(It--, PD, PoolFrees);
+          ++It;
+#endif
+     
+          // Insert after the first using instruction
+          PoolDestroyPoints.push_back(It);
+          PoolDestroyInsertedBlocks.insert(BB);
+        }
+      } else if (!AllIn) {
+        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB);
+             SI != E; ++SI)
+          if (!LiveBlocks.count(*SI) &&
+              !PoolDestroyInsertedBlocks.count(*SI)) {
+            // If this edge is critical, split it.
+            SplitCriticalEdge(BB, SI);
+            
+            // Insert at entry to the successor, but after any PHI nodes.
+            BasicBlock::iterator It = (*SI)->begin();
+            while (isa<PHINode>(It)) ++It;
+            PoolDestroyPoints.push_back(It);
+            PoolDestroyInsertedBlocks.insert(*SI);
+          }
+      }
+    }
+  }
+
+  DEBUG(std::cerr << "  Init in blocks: ");
+
+  // Insert the calls to initialize the pool.
+  unsigned ElSizeV = Heuristic::getRecommendedSize(Node);
+  Value *ElSize = ConstantUInt::get(Type::UIntTy, ElSizeV);
+  unsigned AlignV = Heuristic::getRecommendedAlignment(Node);
+  Value *Align  = ConstantUInt::get(Type::UIntTy, AlignV);
+
+  for (unsigned i = 0, e = PoolInitPoints.size(); i != e; ++i) {
+    new CallInst(PoolInit, make_vector((Value*)PD, ElSize, Align, 0),
+                 "", PoolInitPoints[i]);
+    DEBUG(std::cerr << PoolInitPoints[i]->getParent()->getName() << " ");
+  }
+
+  DEBUG(std::cerr << "\n  Destroy in blocks: ");
+
+  // Loop over all of the places to insert pooldestroy's...
+  for (unsigned i = 0, e = PoolDestroyPoints.size(); i != e; ++i) {
+    // Insert the pooldestroy call for this pool.
+    new CallInst(PoolDestroy, make_vector((Value*)PD, 0), "",
+                 PoolDestroyPoints[i]);
+    DEBUG(std::cerr << PoolDestroyPoints[i]->getParent()->getName()<<" ");
+  }
+  DEBUG(std::cerr << "\n\n");
+
+  // We are allowed to delete any poolfree's which occur between the last
+  // call to poolalloc, and the call to pooldestroy.  Figure out which
+  // basic blocks have this property for this pool.
+  std::set<BasicBlock*> PoolFreeLiveBlocks;
+  if (!DisablePoolFreeOpt)
+    CalculateLivePoolFreeBlocks(PoolFreeLiveBlocks, PD);
+  else
+    PoolFreeLiveBlocks = LiveBlocks;
+
+  // Delete any pool frees which are not in live blocks, for correctness.
+  std::multimap<AllocaInst*, CallInst*>::iterator PFI, PFE;
+  for (tie(PFI,PFE) = PoolFrees.equal_range(PD); PFI != PFE; ) {
+    CallInst *PoolFree = (PFI++)->second;
+    if (!LiveBlocks.count(PoolFree->getParent()) ||
+        !PoolFreeLiveBlocks.count(PoolFree->getParent()))
+      DeleteIfIsPoolFree(PoolFree, PD, PoolFrees);
+  }
+}
+
+
+/// InitializeAndDestroyPools - This inserts calls to poolinit and pooldestroy
+/// into the function to initialize and destroy the pools in the NodesToPA list.
+///
+void PoolAllocate::InitializeAndDestroyPools(Function &F,
+                               const std::vector<const DSNode*> &NodesToPA,
+                          std::map<const DSNode*, Value*> &PoolDescriptors,
+                          std::multimap<AllocaInst*, Instruction*> &PoolUses,
+                          std::multimap<AllocaInst*, CallInst*> &PoolFrees) {
+  std::set<AllocaInst*> AllocasHandled;
+
+  // Insert all of the poolinit/destroy calls into the function.
+  for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
+    const DSNode *Node = NodesToPA[i];
+
+    if (isa<GlobalVariable>(PoolDescriptors[Node]) ||
+        isa<ConstantPointerNull>(PoolDescriptors[Node]))
+      continue;
+
+    assert(isa<AllocaInst>(PoolDescriptors[Node]) && "Why pool allocate this?");
+    AllocaInst *PD = cast<AllocaInst>(PoolDescriptors[Node]);
+    
+    // FIXME: Turn this into an assert and fix the problem!!
+    //assert(PoolUses.count(PD) && "Pool is not used, but is marked heap?!");
+    if (!PoolUses.count(PD) && !PoolFrees.count(PD)) continue;
+    if (!AllocasHandled.insert(PD).second) continue;
+    
+    ++NumPools;
+    if (!Node->isNodeCompletelyFolded())
+      ++NumTSPools;
+    
+    InitializeAndDestroyPool(F, Node, PoolDescriptors, PoolUses, PoolFrees);
+  }
+}


Index: llvm-poolalloc/lib/PoolAllocate/PoolAllocate.h
diff -u /dev/null llvm-poolalloc/lib/PoolAllocate/PoolAllocate.h:1.47
--- /dev/null	Wed May 18 14:56:44 2005
+++ llvm-poolalloc/lib/PoolAllocate/PoolAllocate.h	Wed May 18 14:56:28 2005
@@ -0,0 +1,262 @@
+//===-- PoolAllocate.h - Pool allocation pass -------------------*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This transform changes programs so that disjoint data structures are
+// allocated out of different pools of memory, increasing locality.  This header
+// file exposes information about the pool allocation itself so that follow-on
+// passes may extend or use the pool allocation for analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POOLALLOCATE_H
+#define POOLALLOCATE_H
+
+#include "llvm/Pass.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/hash_set"
+#include <set>
+
+namespace llvm {
+
+class DSNode;
+class DSGraph;
+class Type;
+class AllocaInst;
+class EquivClassGraphs;
+
+namespace PA {
+
+  class Heuristic;
+
+  /// FuncInfo - Represent the pool allocation information for one function in
+  /// the program.  Note that many functions must actually be cloned in order
+  /// for pool allocation to add arguments to the function signature.  In this
+  /// case, the Clone and NewToOldValueMap information identify how the clone
+  /// maps to the original function...
+  ///
+  struct FuncInfo {
+    FuncInfo(Function &f) : F(f), Clone(0) {}
+
+    /// MarkedNodes - The set of nodes which are not locally pool allocatable in
+    /// the current function.
+    ///
+    hash_set<const DSNode*> MarkedNodes;
+
+    /// F - The function this FuncInfo corresponds to.
+    ///
+    Function &F;
+
+    /// Clone - The cloned version of the function, if applicable.
+    ///
+    Function *Clone;
+
+    /// ArgNodes - The list of DSNodes which have pools passed in as arguments.
+    /// 
+    std::vector<const DSNode*> ArgNodes;
+
+    /// NodesToPA - The list of nodes which are to be pool allocated locally in
+    /// this function.  This only includes heap nodes.
+    std::vector<const DSNode*> NodesToPA;
+
+    /// PoolDescriptors - The Value* which defines the pool descriptor for this
+    /// DSNode.  Note: This does not necessarily include pool arguments that are
+    /// passed in because of indirect function calls that are not used in the
+    /// function.
+    std::map<const DSNode*, Value*> PoolDescriptors;
+
+    /// NewToOldValueMap - When and if a function needs to be cloned, this map
+    /// contains a mapping from all of the values in the new function back to
+    /// the values they correspond to in the old function.
+    ///
+    typedef std::map<Value*, const Value*> NewToOldValueMapTy;
+    NewToOldValueMapTy NewToOldValueMap;
+
+    /// MapValueToOriginal - Given a value in the cloned version of this
+    /// function, map it back to the original.  If the specified value did not
+    /// exist in the original function (e.g. because it's a pool descriptor),
+    /// return null.
+    Value *MapValueToOriginal(Value *V) const {
+      NewToOldValueMapTy::const_iterator I = NewToOldValueMap.find(V);
+      return I != NewToOldValueMap.end() ? const_cast<Value*>(I->second) : 0;
+    }
+  };
+
+}; // end PA namespace
+
+
+
+/// PoolAllocate - The main pool allocation pass
+///
+class PoolAllocate : public ModulePass {
+  /// PassAllArguments - If set to true, we should pass pool descriptor
+  /// arguments into any function that loads or stores to a pool, in addition to
+  /// those functions that allocate or deallocate.  See also the
+  /// PoolAllocatePassAllPools pass below.
+  bool PassAllArguments;
+
+  Module *CurModule;
+  EquivClassGraphs *ECGraphs;
+
+  std::map<Function*, PA::FuncInfo> FunctionInfo;
+  std::map<Function*, Function*> CloneToOrigMap;
+public:
+
+  Function *PoolInit, *PoolDestroy, *PoolAlloc, *PoolRealloc, *PoolMemAlign;
+  Function *PoolFree;
+  static const Type *PoolDescPtrTy;
+
+  PA::Heuristic *CurHeuristic;
+
+  /// GlobalNodes - For each node (with an H marker) in the globals graph, this
+  /// map contains the global variable that holds the pool descriptor for the
+  /// node.
+  std::map<const DSNode*, Value*> GlobalNodes;
+
+ public:
+  PoolAllocate(bool passAllArguments = false) 
+    : PassAllArguments(passAllArguments) {}
+
+  bool runOnModule(Module &M);
+  
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  
+  EquivClassGraphs &getECGraphs() const { return *ECGraphs; }
+  
+  /// getOrigFunctionFromClone - Given a pointer to a function that was cloned
+  /// from another function, return the original function.  If the argument
+  /// function is not a clone, return null.
+  Function *getOrigFunctionFromClone(Function *F) const {
+    std::map<Function*, Function*>::const_iterator I = CloneToOrigMap.find(F);
+    return I != CloneToOrigMap.end() ? I->second : 0;
+  }
+
+  /// getFuncInfo - Return the FuncInfo object for the specified function.
+  ///
+  PA::FuncInfo *getFuncInfo(Function &F) {
+    std::map<Function*, PA::FuncInfo>::iterator I = FunctionInfo.find(&F);
+    return I != FunctionInfo.end() ? &I->second : 0;
+  }
+
+  /// getFuncInfoOrClone - Return the function info object for for the specified
+  /// function.  If this function is a clone of another function, return the
+  /// function info object for the original function.
+  PA::FuncInfo *getFuncInfoOrClone(Function &F) {
+    // If it is cloned or not check it out.
+    if (PA::FuncInfo *FI = getFuncInfo(F))
+      return FI;
+    // Maybe this is a function clone?
+    if (Function *FC = getOrigFunctionFromClone(&F))
+      return getFuncInfo(*FC);
+    return 0;
+  }
+  
+
+  /// releaseMemory - When the pool allocator is no longer used, release
+  /// resources used by it.
+  virtual void releaseMemory() {
+    FunctionInfo.clear();
+    GlobalNodes.clear();
+    CloneToOrigMap.clear();
+  }
+
+
+  Module *getCurModule() { return CurModule; }
+
+  /// CreateGlobalPool - Create a global pool descriptor, initialize it in main,
+  /// and return a pointer to the global for it.
+  GlobalVariable *CreateGlobalPool(unsigned RecSize, unsigned Alignment,
+                                   Instruction *IPHint = 0);
+
+ private:
+  
+  /// AddPoolPrototypes - Add prototypes for the pool functions to the
+  /// specified module and update the Pool* instance variables to point to
+  /// them.
+  ///
+  void AddPoolPrototypes();
+
+  /// MicroOptimizePoolCalls - Apply any microoptimizations to calls to pool
+  /// allocation function calls that we can.
+  void MicroOptimizePoolCalls();
+  
+  /// BuildIndirectFunctionSets - Iterate over the module looking for indirect
+  /// calls to functions
+  void BuildIndirectFunctionSets(Module &M);   
+
+  /// SetupGlobalPools - Create global pools for all DSNodes in the globals
+  /// graph which contain heap objects.  If a global variable points to a piece
+  /// of memory allocated from the heap, this pool gets a global lifetime.
+  ///
+  /// This method returns true if correct pool allocation of the module cannot
+  /// be performed because there is no main function for the module and there
+  /// are global pools.
+  bool SetupGlobalPools(Module &M);
+
+  /// FindFunctionPoolArgs - In the first pass over the program, we decide which
+  /// arguments will have to be added for each function, build the FunctionInfo
+  /// map and recording this info in the ArgNodes set.
+  void FindFunctionPoolArgs(Function &F);   
+  
+  /// MakeFunctionClone - If the specified function needs to be modified for
+  /// pool allocation support, make a clone of it, adding additional arguments
+  /// as neccesary, and return it.  If not, just return null.
+  ///
+  Function *MakeFunctionClone(Function &F);
+  
+  /// ProcessFunctionBody - Rewrite the body of a transformed function to use
+  /// pool allocation where appropriate.
+  ///
+  void ProcessFunctionBody(Function &Old, Function &New);
+  
+  /// CreatePools - This inserts alloca instruction in the function for all
+  /// pools specified in the NodesToPA list.  This adds an entry to the
+  /// PoolDescriptors map for each DSNode.
+  ///
+  void CreatePools(Function &F, DSGraph &G, 
+                   const std::vector<const DSNode*> &NodesToPA,
+                   std::map<const DSNode*, Value*> &PoolDescriptors);
+  
+  void TransformBody(DSGraph &g, PA::FuncInfo &fi,
+                     std::multimap<AllocaInst*, Instruction*> &poolUses,
+                     std::multimap<AllocaInst*, CallInst*> &poolFrees,
+                     Function &F);
+
+  /// InitializeAndDestroyPools - This inserts calls to poolinit and pooldestroy
+  /// into the function to initialize and destroy the pools in the NodesToPA
+  /// list.
+  void InitializeAndDestroyPools(Function &F,
+                                 const std::vector<const DSNode*> &NodesToPA,
+                      std::map<const DSNode*, Value*> &PoolDescriptors,
+                      std::multimap<AllocaInst*, Instruction*> &PoolUses,
+                      std::multimap<AllocaInst*, CallInst*> &PoolFrees);
+
+  void InitializeAndDestroyPool(Function &F, const DSNode *Pool,
+                            std::map<const DSNode*, Value*> &PoolDescriptors,
+                            std::multimap<AllocaInst*, Instruction*> &PoolUses,
+                            std::multimap<AllocaInst*, CallInst*> &PoolFrees);
+
+  void CalculateLivePoolFreeBlocks(std::set<BasicBlock*> &LiveBlocks,Value *PD);
+};
+
+
+/// PoolAllocatePassAllPools - This class is the same as the pool allocator,
+/// except that it passes pool descriptors into functions that do not do
+/// allocations or deallocations.  This is needed by the pointer compression
+/// pass, which requires a pool descriptor to be available for a pool if any
+/// load or store to that pool is performed.
+struct PoolAllocatePassAllPools : public PoolAllocate {
+  PoolAllocatePassAllPools() : PoolAllocate(true) {}
+};
+
+}
+
+#endif


Index: llvm-poolalloc/lib/PoolAllocate/PoolOptimize.cpp
diff -u /dev/null llvm-poolalloc/lib/PoolAllocate/PoolOptimize.cpp:1.5
--- /dev/null	Wed May 18 14:56:44 2005
+++ llvm-poolalloc/lib/PoolAllocate/PoolOptimize.cpp	Wed May 18 14:56:28 2005
@@ -0,0 +1,240 @@
+//===-- PoolOptimize.cpp - Optimize pool allocated program ----------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This simple pass optimizes a program that has been through pool allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+namespace {
+  Statistic<> NumBumpPtr("poolalloc", "Number of bump pointer pools");
+
+  struct PoolOptimize : public ModulePass {
+    bool runOnModule(Module &M);
+  };
+
+  RegisterOpt<PoolOptimize>
+  X("pooloptimize", "Optimize a pool allocated program");
+}
+
+static void getCallsOf(Function *F, std::vector<CallInst*> &Calls) {
+  Calls.clear();
+  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E; ++UI)
+    Calls.push_back(cast<CallInst>(*UI));
+}
+
+bool PoolOptimize::runOnModule(Module &M) {
+  const Type *VoidPtrTy = PointerType::get(Type::SByteTy);
+  const Type *PoolDescPtrTy = PointerType::get(ArrayType::get(VoidPtrTy, 16));
+
+
+  // Get poolinit function.
+  Function *PoolInit = M.getOrInsertFunction("poolinit", Type::VoidTy,
+                                             PoolDescPtrTy, Type::UIntTy,
+                                             Type::UIntTy, 0);
+
+  // Get pooldestroy function.
+  Function *PoolDestroy = M.getOrInsertFunction("pooldestroy", Type::VoidTy,
+                                                PoolDescPtrTy, 0);
+  
+  // The poolalloc function.
+  Function *PoolAlloc = M.getOrInsertFunction("poolalloc", 
+                                              VoidPtrTy, PoolDescPtrTy,
+                                              Type::UIntTy, 0);
+  
+  // The poolrealloc function.
+  Function *PoolRealloc = M.getOrInsertFunction("poolrealloc",
+                                                VoidPtrTy, PoolDescPtrTy,
+                                                VoidPtrTy, Type::UIntTy, 0);
+  // The poolmemalign function.
+  Function *PoolMemAlign = M.getOrInsertFunction("poolmemalign",
+                                                 VoidPtrTy, PoolDescPtrTy,
+                                                 Type::UIntTy, Type::UIntTy, 0);
+
+  // Get the poolfree function.
+  Function *PoolFree = M.getOrInsertFunction("poolfree", Type::VoidTy,
+                                             PoolDescPtrTy, VoidPtrTy, 0);  
+
+
+  // Get poolinit_bp function.
+  Function *PoolInitBP = M.getOrInsertFunction("poolinit_bp", Type::VoidTy,
+                                               PoolDescPtrTy, Type::UIntTy, 0);
+  
+  // Get pooldestroy_bp function.
+  Function *PoolDestroyBP = M.getOrInsertFunction("pooldestroy_bp",Type::VoidTy,
+                                                 PoolDescPtrTy, 0);
+  
+  // The poolalloc_bp function.
+  Function *PoolAllocBP = M.getOrInsertFunction("poolalloc_bp", 
+                                                VoidPtrTy, PoolDescPtrTy,
+                                                Type::UIntTy, 0);
+
+  Function *Realloc = M.getOrInsertFunction("realloc",
+                                            VoidPtrTy, VoidPtrTy, Type::UIntTy,
+                                            0);
+  Function *MemAlign = M.getOrInsertFunction("memalign",
+                                             VoidPtrTy, Type::UIntTy,
+                                             Type::UIntTy, 0);
+
+
+  // Optimize poolreallocs
+  std::vector<CallInst*> Calls;
+  getCallsOf(PoolRealloc, Calls);
+  for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
+    CallInst *CI = Calls[i];
+    // poolrealloc(PD, null, X) -> poolalloc(PD, X)
+    if (isa<ConstantPointerNull>(CI->getOperand(2))) {
+      std::vector<Value*> Ops;
+      Ops.push_back(CI->getOperand(1));
+      Ops.push_back(CI->getOperand(3));
+      Value *New = new CallInst(PoolAlloc, Ops, CI->getName(), CI);
+      CI->replaceAllUsesWith(New);
+      CI->eraseFromParent();
+    } else if (isa<Constant>(CI->getOperand(3)) && 
+               cast<Constant>(CI->getOperand(3))->isNullValue()) {
+      // poolrealloc(PD, X, 0) -> poolfree(PD, X)
+      std::vector<Value*> Ops;
+      Ops.push_back(CI->getOperand(1));
+      Ops.push_back(CI->getOperand(2));
+      new CallInst(PoolFree, Ops, "", CI);
+      CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+      CI->eraseFromParent();
+    } else if (isa<ConstantPointerNull>(CI->getOperand(1))) {
+      // poolrealloc(null, X, Y) -> realloc(X, Y)
+      std::vector<Value*> Ops;
+      Ops.push_back(CI->getOperand(2));
+      Ops.push_back(CI->getOperand(3));
+      Value *New = new CallInst(Realloc, Ops, CI->getName(), CI);
+      CI->replaceAllUsesWith(New);
+      CI->eraseFromParent();
+    }
+  }
+
+  // Optimize poolallocs
+  getCallsOf(PoolAlloc, Calls);
+  for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
+    CallInst *CI = Calls[i];
+    // poolalloc(null, X) -> malloc(X)
+    if (isa<Constant>(CI->getOperand(1)) && 
+        cast<Constant>(CI->getOperand(1))->isNullValue()) {
+      Value *New = new MallocInst(Type::SByteTy, CI->getOperand(2),
+                                  CI->getName(), CI);
+      CI->replaceAllUsesWith(New);
+      CI->eraseFromParent();
+    }
+  }
+
+  // Optimize poolmemaligns
+  getCallsOf(PoolMemAlign, Calls);
+  for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
+    CallInst *CI = Calls[i];
+    // poolmemalign(null, X, Y) -> memalign(X, Y)
+    if (isa<ConstantPointerNull>(CI->getOperand(1))) {
+      std::vector<Value*> Ops;
+      Ops.push_back(CI->getOperand(2));
+      Ops.push_back(CI->getOperand(3));
+      Value *New = new CallInst(MemAlign, Ops, CI->getName(), CI);
+      CI->replaceAllUsesWith(New);
+      CI->eraseFromParent();
+    }
+  }
+
+  // Optimize poolfrees
+  getCallsOf(PoolFree, Calls);
+  for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
+    CallInst *CI = Calls[i];
+    // poolfree(PD, null) -> noop
+    if (isa<ConstantPointerNull>(CI->getOperand(2)))
+      CI->eraseFromParent();
+    else if (isa<ConstantPointerNull>(CI->getOperand(1))) {
+      // poolfree(null, Ptr) -> free(Ptr)
+      new FreeInst(CI->getOperand(2), CI);
+      CI->eraseFromParent();
+    }
+  }
+      
+  // Transform pools that only have poolinit/destroy/allocate uses into
+  // bump-pointer pools.  Also, delete pools that are unused.  Find pools by
+  // looking for pool inits in the program.
+  getCallsOf(PoolInit, Calls);
+  std::set<Value*> Pools;
+  for (unsigned i = 0, e = Calls.size(); i != e; ++i)
+    Pools.insert(Calls[i]->getOperand(1));
+
+  // Loop over all of the pools processing each as we find it.
+  for (std::set<Value*>::iterator PI = Pools.begin(), E = Pools.end();
+       PI != E; ++PI) {
+    bool HasPoolAlloc = false, HasOtherUse = false;
+    Value *PoolDesc = *PI;
+    for (Value::use_iterator UI = PoolDesc->use_begin(),
+           E = PoolDesc->use_end(); UI != E; ++UI) {
+      if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+        if (CI->getCalledFunction() == PoolInit ||
+            CI->getCalledFunction() == PoolDestroy) {
+          // ignore
+        } else if (CI->getCalledFunction() == PoolAlloc) {
+          HasPoolAlloc = true;
+        } else {
+          HasOtherUse = true;
+          break;
+        }
+      } else {
+        HasOtherUse = true;
+        break;
+      }
+    }
+
+    // Can we optimize it?
+    if (!HasOtherUse) {
+      // Yes, if there are uses at all, nuke the pool init, destroy, and the PD.
+      if (!HasPoolAlloc) {
+        while (!PoolDesc->use_empty())
+          cast<Instruction>(PoolDesc->use_back())->eraseFromParent();
+        if (AllocaInst *AI = dyn_cast<AllocaInst>(PoolDesc))
+          AI->eraseFromParent();
+        else
+          cast<GlobalVariable>(PoolDesc)->eraseFromParent();
+      } else {
+        // Convert all of the pool descriptor users to the BumpPtr flavor.
+        std::vector<User*> PDUsers(PoolDesc->use_begin(), PoolDesc->use_end());
+        
+        while (!PDUsers.empty()) {
+          CallInst *CI = cast<CallInst>(PDUsers.back());
+          PDUsers.pop_back();
+          std::vector<Value*> Args;
+          if (CI->getCalledFunction() == PoolAlloc) {
+            Args.assign(CI->op_begin()+1, CI->op_end());
+            Value *New = new CallInst(PoolAllocBP, Args, CI->getName(), CI);
+            CI->replaceAllUsesWith(New);
+            CI->eraseFromParent();
+          } else if (CI->getCalledFunction() == PoolInit) {
+            Args.assign(CI->op_begin()+1, CI->op_end());
+            Args.erase(Args.begin()+1); // Drop the size argument.
+            Value *New = new CallInst(PoolInitBP, Args, "", CI);
+            CI->eraseFromParent();
+          } else {
+            assert(CI->getCalledFunction() == PoolDestroy);
+            Args.assign(CI->op_begin()+1, CI->op_end());
+            Value *New = new CallInst(PoolDestroyBP, Args, "", CI);
+            CI->eraseFromParent();
+          }
+        }
+        ++NumBumpPtr;
+      }
+    }
+  }
+  return true;
+}


Index: llvm-poolalloc/lib/PoolAllocate/TransformFunctionBody.cpp
diff -u /dev/null llvm-poolalloc/lib/PoolAllocate/TransformFunctionBody.cpp:1.46
--- /dev/null	Wed May 18 14:56:44 2005
+++ llvm-poolalloc/lib/PoolAllocate/TransformFunctionBody.cpp	Wed May 18 14:56:28 2005
@@ -0,0 +1,593 @@
+//===-- TransformFunctionBody.cpp - Pool Function Transformer -------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PoolAllocate::TransformBody method.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "PoolAllocator"
+#include "PoolAllocate.h"
+#include "llvm/Analysis/DataStructure/DataStructure.h"
+#include "llvm/Analysis/DataStructure/DSGraph.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/VectorExtras.h"
+#include <iostream>
+using namespace llvm;
+using namespace PA;
+
+namespace {
+  /// FuncTransform - This class implements transformation required of pool
+  /// allocated functions.
+  struct FuncTransform : public InstVisitor<FuncTransform> {
+    PoolAllocate &PAInfo;
+    DSGraph &G;      // The Bottom-up DS Graph
+    FuncInfo &FI;
+
+    // PoolUses - For each pool (identified by the pool descriptor) keep track
+    // of which blocks require the memory in the pool to not be freed.  This
+    // does not include poolfree's.  Note that this is only tracked for pools
+    // which this is the home of, ie, they are Alloca instructions.
+    std::multimap<AllocaInst*, Instruction*> &PoolUses;
+
+    // PoolDestroys - For each pool, keep track of the actual poolfree calls
+    // inserted into the code.  This is seperated out from PoolUses.
+    std::multimap<AllocaInst*, CallInst*> &PoolFrees;
+
+    FuncTransform(PoolAllocate &P, DSGraph &g, FuncInfo &fi,
+                  std::multimap<AllocaInst*, Instruction*> &poolUses,
+                  std::multimap<AllocaInst*, CallInst*> &poolFrees)
+      : PAInfo(P), G(g), FI(fi),
+        PoolUses(poolUses), PoolFrees(poolFrees) {
+    }
+
+    template <typename InstType, typename SetType>
+    void AddPoolUse(InstType &I, Value *PoolHandle, SetType &Set) {
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(PoolHandle))
+        Set.insert(std::make_pair(AI, &I));
+    }
+
+    void visitInstruction(Instruction &I);
+    void visitMallocInst(MallocInst &MI);
+    void visitCallocCall(CallSite CS);
+    void visitReallocCall(CallSite CS);
+    void visitMemAlignCall(CallSite CS);
+    void visitFreeInst(FreeInst &FI);
+    void visitCallSite(CallSite CS);
+    void visitCallInst(CallInst &CI) { visitCallSite(&CI); }
+    void visitInvokeInst(InvokeInst &II) { visitCallSite(&II); }
+    void visitLoadInst(LoadInst &I);
+    void visitStoreInst (StoreInst &I);
+
+  private:
+    Instruction *TransformAllocationInstr(Instruction *I, Value *Size);
+    Instruction *InsertPoolFreeInstr(Value *V, Instruction *Where);
+
+    void UpdateNewToOldValueMap(Value *OldVal, Value *NewV1, Value *NewV2 = 0) {
+      std::map<Value*, const Value*>::iterator I =
+        FI.NewToOldValueMap.find(OldVal);
+      assert(I != FI.NewToOldValueMap.end() && "OldVal not found in clone?");
+      FI.NewToOldValueMap.insert(std::make_pair(NewV1, I->second));
+      if (NewV2)
+        FI.NewToOldValueMap.insert(std::make_pair(NewV2, I->second));
+      FI.NewToOldValueMap.erase(I);
+    }
+
+    Value* getOldValueIfAvailable(Value* V) {
+      if (!FI.NewToOldValueMap.empty()) {
+        // If the NewToOldValueMap is in effect, use it.
+        std::map<Value*,const Value*>::iterator I = FI.NewToOldValueMap.find(V);
+        if (I != FI.NewToOldValueMap.end())
+          V = (Value*)I->second;
+      }
+      return V;
+    }
+
+    DSNodeHandle& getDSNodeHFor(Value *V) {
+      return G.getScalarMap()[getOldValueIfAvailable(V)];
+    }
+
+    Value *getPoolHandle(Value *V) {
+      DSNode *Node = getDSNodeHFor(V).getNode();
+      // Get the pool handle for this DSNode...
+      std::map<const DSNode*, Value*>::iterator I =
+        FI.PoolDescriptors.find(Node);
+      return I != FI.PoolDescriptors.end() ? I->second : 0;
+    }
+    
+    Function* retCloneIfFunc(Value *V);
+  };
+}
+
+void PoolAllocate::TransformBody(DSGraph &g, PA::FuncInfo &fi,
+                              std::multimap<AllocaInst*,Instruction*> &poolUses,
+                              std::multimap<AllocaInst*, CallInst*> &poolFrees,
+                                 Function &F) {
+  FuncTransform(*this, g, fi, poolUses, poolFrees).visit(F);
+}
+
+
+// Returns the clone if  V is a static function (not a pointer) and belongs 
+// to an equivalence class i.e. is pool allocated
+Function* FuncTransform::retCloneIfFunc(Value *V) {
+  if (Function *F = dyn_cast<Function>(V))
+    if (FuncInfo *FI = PAInfo.getFuncInfo(*F))
+      return FI->Clone;
+
+  return 0;
+}
+
+void FuncTransform::visitLoadInst(LoadInst &LI) {
+  if (Value *PH = getPoolHandle(LI.getOperand(0)))
+    AddPoolUse(LI, PH, PoolUses);
+  visitInstruction(LI);
+}
+
+void FuncTransform::visitStoreInst(StoreInst &SI) {
+  if (Value *PH = getPoolHandle(SI.getOperand(1)))
+    AddPoolUse(SI, PH, PoolUses);
+  visitInstruction(SI);
+}
+
+Instruction *FuncTransform::TransformAllocationInstr(Instruction *I,
+                                                     Value *Size) {
+  std::string Name = I->getName(); I->setName("");
+
+  if (Size->getType() != Type::UIntTy)
+    Size = new CastInst(Size, Type::UIntTy, Size->getName(), I);
+
+  // Insert a call to poolalloc
+  Value *PH = getPoolHandle(I);
+  Instruction *V = new CallInst(PAInfo.PoolAlloc, make_vector(PH, Size, 0),
+                                Name, I);
+
+  AddPoolUse(*V, PH, PoolUses);
+
+  // Cast to the appropriate type if necessary
+  Instruction *Casted = V;
+  if (V->getType() != I->getType())
+    Casted = new CastInst(V, I->getType(), V->getName(), I);
+    
+  // Update def-use info
+  I->replaceAllUsesWith(Casted);
+
+  // If we are modifying the original function, update the DSGraph.
+  if (!FI.Clone) {
+    // V and Casted now point to whatever the original allocation did.
+    G.getScalarMap().replaceScalar(I, V);
+    if (V != Casted)
+      G.getScalarMap()[Casted] = G.getScalarMap()[V];
+  } else {             // Otherwise, update the NewToOldValueMap
+    UpdateNewToOldValueMap(I, V, V != Casted ? Casted : 0);
+  }
+
+  // If this was an invoke, fix up the CFG.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+    new BranchInst(II->getNormalDest(), I);
+    II->getUnwindDest()->removePredecessor(II->getParent(), true);
+  }
+
+  // Remove old allocation instruction.
+  I->eraseFromParent();
+  return Casted;
+}
+
+
+void FuncTransform::visitMallocInst(MallocInst &MI) {
+  // Get the pool handle for the node that this contributes to...
+  Value *PH = getPoolHandle(&MI);
+  if (PH == 0 || isa<ConstantPointerNull>(PH)) return;
+
+  TargetData &TD = PAInfo.getAnalysis<TargetData>();
+  Value *AllocSize =
+    ConstantUInt::get(Type::UIntTy, TD.getTypeSize(MI.getAllocatedType()));
+
+  if (MI.isArrayAllocation())
+    AllocSize = BinaryOperator::create(Instruction::Mul, AllocSize,
+                                       MI.getOperand(0), "sizetmp", &MI);
+
+  TransformAllocationInstr(&MI, AllocSize);
+}
+
+
+Instruction *FuncTransform::InsertPoolFreeInstr(Value *Arg, Instruction *Where){
+  Value *PH = getPoolHandle(Arg);  // Get the pool handle for this DSNode...
+  if (PH == 0 || isa<ConstantPointerNull>(PH)) return 0;
+
+  // Insert a cast and a call to poolfree...
+  Value *Casted = Arg;
+  if (Arg->getType() != PointerType::get(Type::SByteTy)) {
+    Casted = new CastInst(Arg, PointerType::get(Type::SByteTy),
+				 Arg->getName()+".casted", Where);
+    G.getScalarMap()[Casted] = G.getScalarMap()[Arg];
+  }
+
+  CallInst *FreeI = new CallInst(PAInfo.PoolFree, make_vector(PH, Casted, 0), 
+				 "", Where);
+  AddPoolUse(*FreeI, PH, PoolFrees);
+  return FreeI;
+}
+
+
+void FuncTransform::visitFreeInst(FreeInst &FrI) {
+  if (Instruction *I = InsertPoolFreeInstr(FrI.getOperand(0), &FrI)) {
+    // Delete the now obsolete free instruction...
+    FrI.getParent()->getInstList().erase(&FrI);
+ 
+    // Update the NewToOldValueMap if this is a clone
+    if (!FI.NewToOldValueMap.empty()) {
+      std::map<Value*,const Value*>::iterator II =
+        FI.NewToOldValueMap.find(&FrI);
+      assert(II != FI.NewToOldValueMap.end() && 
+             "FrI not found in clone?");
+      FI.NewToOldValueMap.insert(std::make_pair(I, II->second));
+      FI.NewToOldValueMap.erase(II);
+    }
+  }
+}
+
+
+void FuncTransform::visitCallocCall(CallSite CS) {
+  Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
+  assert(CS.arg_end()-CS.arg_begin() == 2 && "calloc takes two arguments!");
+  Value *V1 = CS.getArgument(0);
+  Value *V2 = CS.getArgument(1);
+  if (V1->getType() != V2->getType()) {
+    V1 = new CastInst(V1, Type::UIntTy, V1->getName(), CS.getInstruction());
+    V2 = new CastInst(V2, Type::UIntTy, V2->getName(), CS.getInstruction());
+  }
+
+  V2 = BinaryOperator::create(Instruction::Mul, V1, V2, "size",
+                              CS.getInstruction());
+  if (V2->getType() != Type::UByteTy)
+    V2 = new CastInst(V2, Type::UIntTy, V2->getName(), CS.getInstruction());
+
+  BasicBlock::iterator BBI =
+    TransformAllocationInstr(CS.getInstruction(), V2);
+  Value *Ptr = BBI++;
+
+  // We just turned the call of 'calloc' into the equivalent of malloc.  To
+  // finish calloc, we need to zero out the memory.
+  Function *MemSet = M->getOrInsertFunction("llvm.memset",
+                                            Type::VoidTy,
+                                            PointerType::get(Type::SByteTy),
+                                            Type::UByteTy, Type::UIntTy,
+                                            Type::UIntTy, 0);
+
+  if (Ptr->getType() != PointerType::get(Type::SByteTy))
+    Ptr = new CastInst(Ptr, PointerType::get(Type::SByteTy), Ptr->getName(),
+                       BBI);
+  
+  // We know that the memory returned by poolalloc is at least 4 byte aligned.
+  new CallInst(MemSet, make_vector(Ptr, ConstantUInt::get(Type::UByteTy, 0),
+                                   V2,  ConstantUInt::get(Type::UIntTy, 4), 0),
+               "", BBI);
+}
+
+
+void FuncTransform::visitReallocCall(CallSite CS) {
+  assert(CS.arg_end()-CS.arg_begin() == 2 && "realloc takes two arguments!");
+  Instruction *I = CS.getInstruction();
+  Value *PH = getPoolHandle(I);
+  Value *OldPtr = CS.getArgument(0);
+  Value *Size = CS.getArgument(1);
+
+  if (Size->getType() != Type::UIntTy)
+    Size = new CastInst(Size, Type::UIntTy, Size->getName(), I);
+
+  static Type *VoidPtrTy = PointerType::get(Type::SByteTy);
+  if (OldPtr->getType() != VoidPtrTy)
+    OldPtr = new CastInst(OldPtr, VoidPtrTy, OldPtr->getName(), I);
+
+  std::string Name = I->getName(); I->setName("");
+  Instruction *V = new CallInst(PAInfo.PoolRealloc, make_vector(PH, OldPtr,
+                                                                Size, 0),
+                                Name, I);
+  Instruction *Casted = V;
+  if (V->getType() != I->getType())
+    Casted = new CastInst(V, I->getType(), V->getName(), I);
+
+  // Update def-use info
+  I->replaceAllUsesWith(Casted);
+
+  // If we are modifying the original function, update the DSGraph.
+  if (!FI.Clone) {
+    // V and Casted now point to whatever the original allocation did.
+    G.getScalarMap().replaceScalar(I, V);
+    if (V != Casted)
+      G.getScalarMap()[Casted] = G.getScalarMap()[V];
+  } else {             // Otherwise, update the NewToOldValueMap
+    UpdateNewToOldValueMap(I, V, V != Casted ? Casted : 0);
+  }
+
+  // If this was an invoke, fix up the CFG.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+    new BranchInst(II->getNormalDest(), I);
+    II->getUnwindDest()->removePredecessor(II->getParent(), true);
+  }
+
+  // Remove old allocation instruction.
+  I->eraseFromParent();
+}
+
+
+/// visitMemAlignCall - Handle memalign and posix_memalign.
+///
+void FuncTransform::visitMemAlignCall(CallSite CS) {
+  Instruction *I = CS.getInstruction();
+  Value *ResultDest = 0;
+  Value *Align = 0;
+  Value *Size = 0;
+  Value *PH;
+
+  if (CS.getCalledFunction()->getName() == "memalign") {
+    Align = CS.getArgument(0);
+    Size = CS.getArgument(1);
+    PH = getPoolHandle(I);
+  } else {
+    assert(CS.getCalledFunction()->getName() == "posix_memalign");
+    ResultDest = CS.getArgument(0);
+    Align   = CS.getArgument(1);
+    Size    = CS.getArgument(2);
+
+    assert(0 && "posix_memalign not implemented fully!");
+    // We need to get the pool descriptor corresponding to *ResultDest.
+    PH = getPoolHandle(I);
+
+    // Return success always.
+    Value *RetVal = Constant::getNullValue(I->getType());
+    I->replaceAllUsesWith(RetVal);
+
+    static const Type *PtrPtr=PointerType::get(PointerType::get(Type::SByteTy));
+    if (ResultDest->getType() != PtrPtr)
+      ResultDest = new CastInst(ResultDest, PtrPtr, ResultDest->getName(), I);
+  }
+
+  if (Align->getType() != Type::UIntTy)
+    Align = new CastInst(Align, Type::UIntTy, Align->getName(), I);
+  if (Size->getType() != Type::UIntTy)
+    Size = new CastInst(Size, Type::UIntTy, Size->getName(), I);
+
+  std::string Name = I->getName(); I->setName("");
+  Instruction *V = new CallInst(PAInfo.PoolMemAlign,
+                                make_vector(PH, Align, Size, 0), Name, I);
+
+  Instruction *Casted = V;
+  if (V->getType() != I->getType())
+    Casted = new CastInst(V, I->getType(), V->getName(), I);
+
+  if (ResultDest)
+    new StoreInst(V, ResultDest, I);
+  else
+    I->replaceAllUsesWith(Casted);
+
+  // If we are modifying the original function, update the DSGraph.
+  if (!FI.Clone) {
+    // V and Casted now point to whatever the original allocation did.
+    G.getScalarMap().replaceScalar(I, V);
+    if (V != Casted)
+      G.getScalarMap()[Casted] = G.getScalarMap()[V];
+  } else {             // Otherwise, update the NewToOldValueMap
+    UpdateNewToOldValueMap(I, V, V != Casted ? Casted : 0);
+  }
+
+  // If this was an invoke, fix up the CFG.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
+    new BranchInst(II->getNormalDest(), I);
+    II->getUnwindDest()->removePredecessor(II->getParent(), true);
+  }
+
+  // Remove old allocation instruction.
+  I->eraseFromParent();
+}
+
+
+
+void FuncTransform::visitCallSite(CallSite CS) {
+  Function *CF = CS.getCalledFunction();
+  Instruction *TheCall = CS.getInstruction();
+
+  // If this function is one of the memory manipulating functions built into
+  // libc, emulate it with pool calls as appropriate.
+  if (CF && CF->isExternal())
+    if (CF->getName() == "calloc") {
+      visitCallocCall(CS);
+      return;
+    } else if (CF->getName() == "realloc") {
+      visitReallocCall(CS);
+      return;
+    } else if (CF->getName() == "memalign" ||
+               CF->getName() == "posix_memalign") {
+      visitMemAlignCall(CS);
+      return;
+    } else if (CF->getName() == "strdup") {
+      assert(0 && "strdup should have been linked into the program!");
+    } else if (CF->getName() == "valloc") {
+      std::cerr << "VALLOC USED BUT NOT HANDLED!\n";
+      abort();
+    }
+
+  // We need to figure out which local pool descriptors correspond to the pool
+  // descriptor arguments passed into the function call.  Calculate a mapping
+  // from callee DSNodes to caller DSNodes.  We construct a partial isomophism
+  // between the graphs to figure out which pool descriptors need to be passed
+  // in.  The roots of this mapping is found from arguments and return values.
+  //
+  EquivClassGraphs& ECGraphs = PAInfo.getECGraphs();
+  DSGraph::NodeMapTy NodeMapping;
+  Instruction *NewCall;
+  Value *NewCallee;
+  std::vector<const DSNode*> ArgNodes;
+  DSGraph *CalleeGraph;  // The callee graph
+
+  // For indirect callees find any callee since all DS graphs have been merged.
+  if (CF) {   // Direct calls are nice and simple.
+    DEBUG(std::cerr << "  Handling direct call: " << *TheCall);
+    FuncInfo *CFI = PAInfo.getFuncInfo(*CF);
+    if (CFI == 0 || CFI->Clone == 0) {   // Nothing to transform...
+      visitInstruction(*TheCall);
+      return;
+    }
+    NewCallee = CFI->Clone;
+    ArgNodes = CFI->ArgNodes;
+    
+    CalleeGraph = &ECGraphs.getDSGraph(*CF);
+  } else {
+    DEBUG(std::cerr << "  Handling indirect call: " << *TheCall);
+    
+    // Here we fill in CF with one of the possible called functions.  Because we
+    // merged together all of the arguments to all of the functions in the
+    // equivalence set, it doesn't really matter which one we pick.
+    // (If the function was cloned, we have to map the cloned call instruction
+    // in CS back to the original call instruction.)
+    Instruction *OrigInst =
+      cast<Instruction>(getOldValueIfAvailable(CS.getInstruction()));
+    CF = isa<CallInst>(OrigInst)?
+      ECGraphs.getSomeCalleeForCallSite(cast<CallInst>(OrigInst)) :
+      ECGraphs.getSomeCalleeForCallSite(cast<InvokeInst>(OrigInst));
+
+    if (!CF) {
+      // FIXME: Unknown callees for a call-site. Warn and ignore.
+      std::cerr << "\n***\n*** WARNING (FuncTransform::visitCallSite): "
+                << "Unknown callees for call-site in function "
+                << CS.getCaller()->getName() << "\n***\n";
+      return;
+    }
+
+    // Get the common graph for the set of functions this call may invoke.
+    CalleeGraph = &ECGraphs.getDSGraph(*CF);
+    
+#ifndef NDEBUG
+    // Verify that all potential callees at call site have the same DS graph.
+    EquivClassGraphs::callee_iterator I =
+      ECGraphs.callee_begin(OrigInst), E = ECGraphs.callee_end(OrigInst);
+    for (; I != E; ++I)
+      if (!I->second->isExternal())
+        assert(CalleeGraph == &ECGraphs.getDSGraph(*I->second) &&
+               "Callees at call site do not have a common graph!");
+#endif    
+
+    // Find the DS nodes for the arguments that need to be added, if any.
+    FuncInfo *CFI = PAInfo.getFuncInfo(*CF);
+    assert(CFI && "No function info for callee at indirect call?");
+    ArgNodes = CFI->ArgNodes;
+
+    if (ArgNodes.empty())
+      return;           // No arguments to add?  Transformation is a noop!
+
+    // Cast the function pointer to an appropriate type!
+    std::vector<const Type*> ArgTys(ArgNodes.size(),
+                                    PoolAllocate::PoolDescPtrTy);
+    for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+         I != E; ++I)
+      ArgTys.push_back((*I)->getType());
+    
+    FunctionType *FTy = FunctionType::get(TheCall->getType(), ArgTys, false);
+    PointerType *PFTy = PointerType::get(FTy);
+    
+    // If there are any pool arguments cast the func ptr to the right type.
+    NewCallee = new CastInst(CS.getCalledValue(), PFTy, "tmp", TheCall);
+  }
+
+  Function::arg_iterator FAI = CF->arg_begin(), E = CF->arg_end();
+  CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+  for ( ; FAI != E && AI != AE; ++FAI, ++AI)
+    if (!isa<Constant>(*AI))
+      DSGraph::computeNodeMapping(CalleeGraph->getNodeForValue(FAI),
+                                  getDSNodeHFor(*AI), NodeMapping, false);
+
+  //assert(AI == AE && "Varargs calls not handled yet!");
+
+  // Map the return value as well...
+  if (DS::isPointerType(TheCall->getType()))
+    DSGraph::computeNodeMapping(CalleeGraph->getReturnNodeFor(*CF),
+                                getDSNodeHFor(TheCall), NodeMapping, false);
+
+    // Map the nodes that are pointed to by globals.
+  DSScalarMap &CalleeSM = CalleeGraph->getScalarMap();
+  for (DSScalarMap::global_iterator GI = G.getScalarMap().global_begin(), 
+         E = G.getScalarMap().global_end(); GI != E; ++GI)
+    if (CalleeSM.count(*GI))
+      DSGraph::computeNodeMapping(CalleeGraph->getNodeForValue(*GI),
+                                  getDSNodeHFor(*GI),
+                                  NodeMapping, false);
+
+  // Okay, now that we have established our mapping, we can figure out which
+  // pool descriptors to pass in...
+  std::vector<Value*> Args;
+  for (unsigned i = 0, e = ArgNodes.size(); i != e; ++i) {
+    Value *ArgVal = Constant::getNullValue(PoolAllocate::PoolDescPtrTy);
+    if (NodeMapping.count(ArgNodes[i]))
+      if (DSNode *LocalNode = NodeMapping[ArgNodes[i]].getNode())
+        if (FI.PoolDescriptors.count(LocalNode))
+          ArgVal = FI.PoolDescriptors.find(LocalNode)->second;
+#if 0
+    if (isa<Constant>(ArgVal) && cast<Constant>(ArgVal)->isNullValue())
+      std::cerr << "WARNING: NULL POOL ARGUMENTS ARE PASSED IN!\n";
+#endif
+    Args.push_back(ArgVal);
+  }
+
+  // Add the rest of the arguments...
+  Args.insert(Args.end(), CS.arg_begin(), CS.arg_end());
+    
+  std::string Name = TheCall->getName(); TheCall->setName("");
+
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+    NewCall = new InvokeInst(NewCallee, II->getNormalDest(),
+                             II->getUnwindDest(), Args, Name, TheCall);
+  } else {
+    NewCall = new CallInst(NewCallee, Args, Name, TheCall);
+  }
+
+  // Add all of the uses of the pool descriptor
+  for (unsigned i = 0, e = ArgNodes.size(); i != e; ++i)
+    AddPoolUse(*NewCall, Args[i], PoolUses);
+
+  TheCall->replaceAllUsesWith(NewCall);
+  DEBUG(std::cerr << "  Result Call: " << *NewCall);
+
+  if (TheCall->getType() != Type::VoidTy) {
+    // If we are modifying the original function, update the DSGraph... 
+    DSGraph::ScalarMapTy &SM = G.getScalarMap();
+    DSGraph::ScalarMapTy::iterator CII = SM.find(TheCall);
+    if (CII != SM.end()) {
+      SM[NewCall] = CII->second;
+      SM.erase(CII);                     // Destroy the CallInst
+    } else if (!FI.NewToOldValueMap.empty()) {
+      // Otherwise, if this is a clone, update the NewToOldValueMap with the new
+      // CI return value.
+      UpdateNewToOldValueMap(TheCall, NewCall);
+    }
+  } else if (!FI.NewToOldValueMap.empty()) {
+    UpdateNewToOldValueMap(TheCall, NewCall);
+  }
+
+  TheCall->eraseFromParent();
+  visitInstruction(*NewCall);
+}
+
+
+// visitInstruction - For all instructions in the transformed function bodies,
+// replace any references to the original calls with references to the
+// transformed calls.  Many instructions can "take the address of" a function,
+// and we must make sure to catch each of these uses, and transform it into a
+// reference to the new, transformed, function.
+void FuncTransform::visitInstruction(Instruction &I) {
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+    if (Function *clonedFunc = retCloneIfFunc(I.getOperand(i))) {
+      Constant *CF = clonedFunc;
+      I.setOperand(i, ConstantExpr::getCast(CF, I.getOperand(i)->getType()));
+    }
+}