[llvm-commits] CVS: llvm/lib/Transforms/IPO/GlobalOpt.cpp

Chris Lattner sabre at nondot.org
Sat Sep 30 16:32:23 PDT 2006



Changes in directory llvm/lib/Transforms/IPO:

GlobalOpt.cpp updated: 1.66 -> 1.67
---
Log message:

Implement SRA of heap allocations.


---
Diffs of the changes:  (+266 -10)

 GlobalOpt.cpp |  276 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 266 insertions(+), 10 deletions(-)


Index: llvm/lib/Transforms/IPO/GlobalOpt.cpp
diff -u llvm/lib/Transforms/IPO/GlobalOpt.cpp:1.66 llvm/lib/Transforms/IPO/GlobalOpt.cpp:1.67
--- llvm/lib/Transforms/IPO/GlobalOpt.cpp:1.66	Sat Sep 30 14:40:30 2006
+++ llvm/lib/Transforms/IPO/GlobalOpt.cpp	Sat Sep 30 18:32:09 2006
@@ -36,6 +36,7 @@
   Statistic<> NumMarked   ("globalopt", "Number of globals marked constant");
   Statistic<> NumSRA      ("globalopt", "Number of aggregate globals broken "
                            "into scalars");
+  Statistic<> NumHeapSRA  ("globalopt", "Number of heap objects SRA'd");
   Statistic<> NumSubstitute("globalopt",
                         "Number of globals with initializers stored into them");
   Statistic<> NumDeleted  ("globalopt", "Number of globals deleted");
@@ -794,9 +795,235 @@
       return false;
     }
   return true;
+}
 
+/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
+/// somewhere.  Transform all uses of the allocation into loads from the
+/// global and uses of the resultant pointer.  Further, delete the store into
+/// GV.  This assumes that these value pass the 
+/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, 
+                                          GlobalVariable *GV) {
+  while (!Alloc->use_empty()) {
+    Instruction *U = Alloc->use_back();
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // If this is the store of the allocation into the global, remove it.
+      if (SI->getOperand(1) == GV) {
+        SI->eraseFromParent();
+        continue;
+      }
+    }
+    
+    // Insert a load from the global, and use it instead of the malloc.
+    Value *NL = new LoadInst(GV, GV->getName()+".val", U);
+    U->replaceUsesOfWith(Alloc, NL);
+  }
 }
 
+/// GlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
+/// GV are simple enough to perform HeapSRA, return true.
+static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV) {
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
+       ++UI)
+    if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+      // We permit two users of the load: setcc comparing against the null
+      // pointer, and a getelementptr of a specific form.
+      for (Value::use_iterator UI = LI->use_begin(), E = LI->use_end(); UI != E; 
+           ++UI) {
+        // Comparison against null is ok.
+        if (SetCondInst *SCI = dyn_cast<SetCondInst>(*UI)) {
+          if (!isa<ConstantPointerNull>(SCI->getOperand(1)))
+            return false;
+          continue;
+        }
+        
+        // getelementptr is also ok, but only a simple form.
+        GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI);
+        if (!GEPI) return false;
+        
+        // Must index into the array and into the struct.
+        if (GEPI->getNumOperands() < 3)
+          return false;
+        
+        // Otherwise the GEP is ok.
+        continue;
+      }
+    }
+  return true;
+}
+
+/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
+/// is a value loaded from the global.  Eliminate all uses of Ptr, making them
+/// use FieldGlobals instead.  All uses of loaded values satisfy
+/// GlobalLoadUsesSimpleEnoughForHeapSRA.
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Ptr, 
+                             const std::vector<GlobalVariable*> &FieldGlobals) {
+  std::vector<Value *> InsertedLoadsForPtr;
+  //InsertedLoadsForPtr.resize(FieldGlobals.size());
+  while (!Ptr->use_empty()) {
+    Instruction *User = Ptr->use_back();
+    
+    // If this is a comparison against null, handle it.
+    if (SetCondInst *SCI = dyn_cast<SetCondInst>(User)) {
+      assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
+      // If we have a setcc of the loaded pointer, we can use a setcc of any
+      // field.
+      Value *NPtr;
+      if (InsertedLoadsForPtr.empty()) {
+        NPtr = new LoadInst(FieldGlobals[0], Ptr->getName()+".f0", Ptr);
+        InsertedLoadsForPtr.push_back(Ptr);
+      } else {
+        NPtr = InsertedLoadsForPtr.back();
+      }
+      
+      Value *New = new SetCondInst(SCI->getOpcode(), NPtr,
+                                   Constant::getNullValue(NPtr->getType()),
+                                   SCI->getName(), SCI);
+      SCI->replaceAllUsesWith(New);
+      SCI->eraseFromParent();
+      continue;
+    }
+    
+    // Otherwise, this should be: 'getelementptr Ptr, Idx, uint FieldNo ...'
+    GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
+    assert(GEPI->getNumOperands() >= 3 && isa<ConstantUInt>(GEPI->getOperand(2))
+           && "Unexpected GEPI!");
+    
+    // Load the pointer for this field.
+    unsigned FieldNo = cast<ConstantUInt>(GEPI->getOperand(2))->getValue();
+    if (InsertedLoadsForPtr.size() <= FieldNo)
+      InsertedLoadsForPtr.resize(FieldNo+1);
+    if (InsertedLoadsForPtr[FieldNo] == 0)
+      InsertedLoadsForPtr[FieldNo] = new LoadInst(FieldGlobals[FieldNo],
+                                                  Ptr->getName()+".f" + 
+                                                  utostr(FieldNo), Ptr);
+    Value *NewPtr = InsertedLoadsForPtr[FieldNo];
+
+    // Create the new GEP idx vector.
+    std::vector<Value*> GEPIdx;
+    GEPIdx.push_back(GEPI->getOperand(1));
+    GEPIdx.insert(GEPIdx.end(), GEPI->op_begin()+3, GEPI->op_end());
+
+    Value *NGEPI = new GetElementPtrInst(NewPtr, GEPIdx, GEPI->getName(), GEPI);
+    GEPI->replaceAllUsesWith(NGEPI);
+    GEPI->eraseFromParent();
+  }
+}
+
+/// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break
+/// it up into multiple allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
+  /*DEBUG*/(std::cerr << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI);
+  const StructType *STy = cast<StructType>(MI->getAllocatedType());
+
+  // There is guaranteed to be at least one use of the malloc (storing
+  // it into GV).  If there are other uses, change them to be uses of
+  // the global to simplify later code.  This also deletes the store
+  // into GV.
+  ReplaceUsesOfMallocWithGlobal(MI, GV);
+  
+  // Okay, at this point, there are no users of the malloc.  Insert N
+  // new mallocs at the same place as MI, and N globals.
+  std::vector<GlobalVariable*> FieldGlobals;
+  std::vector<MallocInst*> FieldMallocs;
+  
+  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+    const Type *FieldTy = STy->getElementType(FieldNo);
+    const Type *PFieldTy = PointerType::get(FieldTy);
+    
+    GlobalVariable *NGV =
+      new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage,
+                         Constant::getNullValue(PFieldTy),
+                         GV->getName() + ".f" + utostr(FieldNo), GV);
+    FieldGlobals.push_back(NGV);
+    
+    MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(),
+                                     MI->getName() + ".f" + utostr(FieldNo),MI);
+    FieldMallocs.push_back(NMI);
+    new StoreInst(NMI, NGV, MI);
+  }
+  
+  // The tricky aspect of this transformation is handling the case when malloc
+  // fails.  In the original code, malloc failing would set the result pointer
+  // of malloc to null.  In this case, some mallocs could succeed and others
+  // could fail.  As such, we emit code that looks like this:
+  //    F0 = malloc(field0)
+  //    F1 = malloc(field1)
+  //    F2 = malloc(field2)
+  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
+  //      if (F0) { free(F0); F0 = 0; }
+  //      if (F1) { free(F1); F1 = 0; }
+  //      if (F2) { free(F2); F2 = 0; }
+  //    }
+  Value *RunningOr = 0;
+  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+    Value *Cond = new SetCondInst(Instruction::SetEQ, FieldMallocs[i],
+                             Constant::getNullValue(FieldMallocs[i]->getType()),
+                                  "isnull", MI);
+    if (!RunningOr)
+      RunningOr = Cond;   // First seteq
+    else
+      RunningOr = BinaryOperator::createOr(RunningOr, Cond, "tmp", MI);
+  }
+
+  // Split the basic block at the old malloc.
+  BasicBlock *OrigBB = MI->getParent();
+  BasicBlock *ContBB = OrigBB->splitBasicBlock(MI, "malloc_cont");
+  
+  // Create the block to check the first condition.  Put all these blocks at the
+  // end of the function as they are unlikely to be executed.
+  BasicBlock *NullPtrBlock = new BasicBlock("malloc_ret_null",
+                                            OrigBB->getParent());
+  
+  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+  // branch on RunningOr.
+  OrigBB->getTerminator()->eraseFromParent();
+  new BranchInst(NullPtrBlock, ContBB, RunningOr, OrigBB);
+  
+  // Within the NullPtrBlock, we need to emit a comparison and branch for each
+  // pointer, because some may be null while others are not.
+  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+    Value *Cmp = new SetCondInst(Instruction::SetNE, GVVal, 
+                                 Constant::getNullValue(GVVal->getType()),
+                                 "tmp", NullPtrBlock);
+    BasicBlock *FreeBlock = new BasicBlock("free_it", OrigBB->getParent());
+    BasicBlock *NextBlock = new BasicBlock("next", OrigBB->getParent());
+    new BranchInst(FreeBlock, NextBlock, Cmp, NullPtrBlock);
+
+    // Fill in FreeBlock.
+    new FreeInst(GVVal, FreeBlock);
+    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+                  FreeBlock);
+    new BranchInst(NextBlock, FreeBlock);
+    
+    NullPtrBlock = NextBlock;
+  }
+  
+  new BranchInst(ContBB, NullPtrBlock);
+  
+  
+  // MI is no longer needed, remove it.
+  MI->eraseFromParent();
+
+  
+  // Okay, the malloc site is completely handled.  All of the uses of GV are now
+  // loads, and all uses of those loads are simple.  Rewrite them to use loads
+  // of the per-field globals instead.
+  while (!GV->use_empty()) {
+    LoadInst *LI = cast<LoadInst>(GV->use_back());
+    RewriteUsesOfLoadForHeapSRoA(LI, FieldGlobals);
+    LI->eraseFromParent();
+  }
+
+  // The old global is now dead, remove it.
+  GV->eraseFromParent();
+
+  ++NumHeapSRA;
+  return FieldGlobals[0];
+}
+
+
 // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
 // that only one value (besides its initializer) is ever stored to the global.
 static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
@@ -835,23 +1062,52 @@
       if (!MI->getAllocatedType()->isSized())
         return false;
       
+      // We can't optimize this global unless all uses of it are *known* to be
+      // of the malloc value, not of the null initializer value (consider a use
+      // that compares the global's value against zero to see if the malloc has
+      // been reached).  To do this, we check to see if all uses of the global
+      // would trap if the global were null: this proves that they must all
+      // happen after the malloc.
+      if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+        return false;
+
+      // We can't optimize this if the malloc itself is used in a complex way,
+      // for example, being stored into multiple globals.  This allows the
+      // malloc to be stored into the specified global, loaded setcc'd, and
+      // GEP'd.  These are all things we could transform to using the global
+      // for.
+      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV))
+        return false;
+
+      
       // If we have a global that is only initialized with a fixed size malloc,
-      // and if all users of the malloc trap, and if the malloc'd address is not
-      // put anywhere else, transform the program to use global memory instead
-      // of malloc'd memory.  This eliminates dynamic allocation (good) and
-      // exposes the resultant global to further GlobalOpt (even better).  Note
-      // that we restrict this transformation to only working on small
-      // allocations (2048 bytes currently), as we don't want to introduce a 16M
-      // global or something.
+      // transform the program to use global memory instead of malloc'd memory.
+      // This eliminates dynamic allocation, avoids an indirection accessing the
+      // data, and exposes the resultant global to further GlobalOpt.
       if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) {
+        // Restrict this transformation to only working on small allocations
+        // (2048 bytes currently), as we don't want to introduce a 16M global or
+        // something.
         if (NElements->getRawValue()*
-                     TD.getTypeSize(MI->getAllocatedType()) < 2048 &&
-            AllUsesOfLoadedValueWillTrapIfNull(GV) &&
-            ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV)) {
+                     TD.getTypeSize(MI->getAllocatedType()) < 2048) {
           GVI = OptimizeGlobalAddressOfMalloc(GV, MI);
           return true;
         }
       }
+
+      // If the allocation is an array of structures, consider transforming this
+      // into multiple malloc'd arrays, one for each field.  This is basically
+      // SRoA for malloc'd memory.
+      if (const StructType *AllocTy = 
+                  dyn_cast<StructType>(MI->getAllocatedType())) {
+        // This the structure has an unreasonable number of fields, leave it
+        // alone.
+        if (AllocTy->getNumElements() <= 16 && AllocTy->getNumElements() > 0 &&
+            GlobalLoadUsesSimpleEnoughForHeapSRA(GV)) {
+          GVI = PerformHeapAllocSRoA(GV, MI);
+          return true;
+        }
+      }
     }
   }
 






More information about the llvm-commits mailing list