[llvm-commits] [PATCH] New pass: pointer (bounds) tracking!

Tue Jul 14 10:56:39 PDT 2009

On Jul 13, 2009, at 1:59 PM, Török Edwin wrote:
>
> I fixed this in pointertracking_notready.patch by checking that:
> - there is one and only one successor  of the branch that dominates  
> the
> target BB, excluding backedges
> -  all predecessors of the targetBB are dominated by that successor

There are still cases that this doesn't catch. For example in this CFG:

   BranchBB
   /     \
OtherBB  |
    \     |
     \    |
       DomBB
         |
       TargetBB

BranchBB has exactly one successor (DomBB) which dominates TargetBB,
and all of TargetBB's predecessors (it only has one, DomBB) are
dominated by that successor. However, TargetBB is still reachable
from OtherBB.

I think pointertracking1.patch can be checked in when you're ready;
you can continue to work on it after it's in the tree. Here are a
few comments on the pointertracking1.patch for now:

 > +
 > +#include "llvm/Analysis/LoopInfo.h"

FWIW, it's now possible to forward-declare Loop and LoopInfo, so you
can move this #include "llvm/Analysis/LoopInfo.h" to  
PointerTracking.cpp.

 > +#include "llvm/Instructions.h"
 > +#include "llvm/Pass.h"
 > +#include "llvm/Support/PredIteratorCache.h"
 > +
 > +namespace llvm {
 > +  class DominatorTree;
 > +  class ScalarEvolution;
 > +  class SCEV;
 > +  class TargetData;
 > +
 > +  enum SolverResult {
 > +    AlwaysFalse,
 > +    AlwaysTrue,// always true, assuming pointer is not NULL,
 > +    // and it is not use-after-free

Please tidy up this comment.

 > +    Unknown // it can sometimes be true, sometimes false, or it is  
undecided
 > +  };
 > +
 > +  class PointerTracking : public FunctionPass {
 > +  public:
 > +    typedef ICmpInst::Predicate Predicate;
 > +    static char ID;
 > +    PointerTracking();
 > +
 > +    virtual bool doInitialization(Module &M);
 > +
 > +    // If this pointer directly points to an allocation, return
 > +    // the number of elements of type Ty allocated.
 > +    // Otherwise return CouldNotCompute.
 > +    // Since allocations can fail by returning NULL, the real  
element count
 > +    // for every allocation is either 0 or the value returned by  
this function.
 > +    const SCEV *getAllocationElementCount(Value *P) const;
 > +
 > +    // Same as getAllocationSize() but returns size in bytes.
 > +    // We consider one byte as 8 bits.
 > +    const SCEV *getAllocationSizeInBytes(Value *V) const;
 > +
 > +    // Given a Pointer, determine a base pointer of known size,  
and an offset
 > +    // therefrom.
 > +    // When unable to determine, sets Base to NULL, and Limit/ 
Offset to
 > +    // CouldNotCompute.
 > +    // BaseSize, and Offset are in bytes: Pointer == Base + Offset
 > +    void getPointerOffset(Value *Pointer, Value *&Base, const SCEV  
*& BaseSize,
 > +                          const SCEV *&Offset) const;
 > +
 > +    // Compares the 2 scalar evolution expressions according to  
predicate,
 > +    // and if it can prove that the result is always true or  
always false
 > +    // return AlwaysTrue/AlwaysFalse. Otherwise it returns Unknown.
 > +    enum SolverResult compareSCEV(const SCEV *A, Predicate Pred,  
const SCEV *B,
 > +                                  const Loop *L);
 > +
 > +    // Determines whether the condition LHS <Pred> RHS is sufficient
 > +    // for the condition A <Pred> B to hold.
 > +    // Currently only ULT/ULE is supported.
 > +    // This errs on the side of returning false.
 > +    bool conditionSufficient(const SCEV *LHS, Predicate Pred1,  
const SCEV *RHS,
 > +                             const SCEV *A, Predicate Pred2, const  
SCEV *B,
 > +                             const Loop *L);
 > +
 > +    // Determines whether Offset is known to be always in [0,  
Limit) bounds.
 > +    // This errs on the side of returning Unknown.
 > +    enum SolverResult checkLimits(const SCEV *Offset, const SCEV  
*Limit,
 > +                                  BasicBlock *BB);
 > +
 > +    virtual bool runOnFunction(Function &F);
 > +    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 > +    void print(raw_ostream &OS, const Module* = 0) const;
 > +    virtual void print(std::ostream &OS, const Module* = 0) const;
 > +  private:
 > +    Function *FF;
 > +    TargetData *TD;
 > +    ScalarEvolution *SE;
 > +    LoopInfo *LI;
 > +    DominatorTree *DT;
 > +
 > +    Function *callocFunc;
 > +    Function *reallocFunc;
 > +    PredIteratorCache predCache;
 > +
 > +    SmallPtrSet<const SCEV*, 1> analyzing;
 > +
 > +    enum SolverResult isLoopGuardedBy(const Loop *L, Predicate Pred,
 > +                                      const SCEV *A, const SCEV  
*B) const;
 > +    static bool isMonotonic(const SCEV *S);
 > +    bool scevPositive(const SCEV *A, const Loop *L, bool  
strict=true) const;
 > +    bool conditionSufficient(Value *Cond, bool negated,
 > +                             const SCEV *A, Predicate Pred, const  
SCEV *B);
 > +    Value *getConditionToReach(BasicBlock *A,
 > +                               DomTreeNodeBase<BasicBlock> *B,
 > +                               bool &negated);
 > +    Value *getConditionToReach(BasicBlock *A,
 > +                               BasicBlock *B,
 > +                               bool &negated);
 > +    const SCEV *computeAllocationCount(Value *P, const Type *&Ty)  
const;
 > +    const SCEV *computeAllocationCountForType(Value *P, const Type  
*Ty) const;
 > +  };
 > +}
 > +#endif
 > +
 > diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/ 
LinkAllPasses.h
 > index 4891f24..c0cd766 100644
 > --- a/include/llvm/LinkAllPasses.h
 > +++ b/include/llvm/LinkAllPasses.h
 > @@ -20,6 +20,7 @@
 >  #include "llvm/Analysis/IntervalPartition.h"
 >  #include "llvm/Analysis/LoopVR.h"
 >  #include "llvm/Analysis/Passes.h"
 > +#include "llvm/Analysis/PointerTracking.h"
 >  #include "llvm/Analysis/PostDominators.h"
 >  #include "llvm/Analysis/ScalarEvolution.h"
 >  #include "llvm/Assembly/PrintModulePass.h"
 > @@ -136,6 +137,7 @@ namespace {
 >        (void)new llvm::FindUsedTypes();
 >        (void)new llvm::ScalarEvolution();
 >        (void)new llvm::LoopVR();
 > +      (void)new llvm::PointerTracking();
 >        ((llvm::Function*)0)->viewCFGOnly();
 >        llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)0);
 >        X.add((llvm::Value*)0, 0);  // for -print-alias-sets
 > diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/ 
PointerTracking.cpp
 > new file mode 100644
 > index 0000000..86773a2
 > --- /dev/null
 > +++ b/lib/Analysis/PointerTracking.cpp
 > @@ -0,0 +1,272 @@
 > +//===- PointerTracking.cpp - Pointer Bounds Tracking ------------ 
*- C++ -*-===//
 > +//
 > +//                     The LLVM Compiler Infrastructure
 > +//
 > +// This file is distributed under the University of Illinois Open  
Source
 > +// License. See LICENSE.TXT for details.
 > +//
 > +// 
= 
= 
=---------------------------------------------------------------------- 
===//
 > +//
 > +// This file implements tracking of pointer bounds.
 > +//
 > +// 
= 
= 
=---------------------------------------------------------------------- 
===//
 > +#include "llvm/Analysis/ConstantFolding.h"
 > +#include "llvm/Analysis/Dominators.h"
 > +#include "llvm/Analysis/LoopInfo.h"
 > +#include "llvm/Analysis/PointerTracking.h"
 > +#include "llvm/Analysis/ScalarEvolution.h"
 > +#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 > +#include "llvm/Module.h"
 > +#include "llvm/Value.h"
 > +#include "llvm/Support/CallSite.h"
 > +#include "llvm/Support/InstIterator.h"
 > +#include "llvm/Support/raw_ostream.h"
 > +#include "llvm/Target/TargetData.h"
 > +
 > +namespace llvm {
 > +char PointerTracking::ID=0;
 > +PointerTracking::PointerTracking() : FunctionPass(&ID) {}
 > +
 > +bool PointerTracking::runOnFunction(Function &F) {
 > +  predCache.clear();
 > +  assert(analyzing.empty());
 > +  FF = &F;
 > +  TD = getAnalysisIfAvailable<TargetData>();
 > +  SE = &getAnalysis<ScalarEvolution>();
 > +  LI = &getAnalysis<LoopInfo>();
 > +  DT = &getAnalysis<DominatorTree>();
 > +  return false;
 > +}
 > +
 > +void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const {
 > +  AU.addRequiredTransitive<DominatorTree>();
 > +  AU.addRequiredTransitive<LoopInfo>();
 > +  AU.addRequiredTransitive<ScalarEvolution>();
 > +  AU.setPreservesAll();
 > +}
 > +
 > +bool PointerTracking::doInitialization(Module &M) {
 > +  const Type *PTy = PointerType::getUnqual(Type::Int8Ty);
 > +
 > +  // Find calloc(i64, i64) or calloc(i32, i32).
 > +  callocFunc = M.getFunction("calloc");
 > +  if (callocFunc) {
 > +    const FunctionType *Ty = callocFunc->getFunctionType();
 > +
 > +    std::vector<const Type*> args, args2;
 > +    args.push_back(Type::Int64Ty);
 > +    args.push_back(Type::Int64Ty);
 > +    args2.push_back(Type::Int32Ty);
 > +    args2.push_back(Type::Int32Ty);
 > +    const FunctionType *Calloc1Type =
 > +      FunctionType::get(PTy, args, false);
 > +    const FunctionType *Calloc2Type =
 > +      FunctionType::get(PTy, args2, false);
 > +    if (Ty != Calloc1Type && Ty != Calloc2Type)
 > +      callocFunc = 0; // Give up
 > +  }
 > +
 > +  // Find realloc(i8*, i64) or realloc(i8*, i32).
 > +  reallocFunc = M.getFunction("realloc");
 > +  if (reallocFunc) {
 > +    const FunctionType *Ty = reallocFunc->getFunctionType();
 > +    std::vector<const Type*> args, args2;
 > +    args.push_back(PTy);
 > +    args.push_back(Type::Int64Ty);
 > +    args2.push_back(PTy);
 > +    args2.push_back(Type::Int32Ty);
 > +
 > +    const FunctionType *Realloc1Type =
 > +      FunctionType::get(PTy, args, false);
 > +    const FunctionType *Realloc2Type =
 > +      FunctionType::get(PTy, args2, false);
 > +    if (Ty != Realloc1Type && Ty != Realloc2Type)
 > +      reallocFunc = 0; // Give up
 > +  }
 > +  return false;
 > +}
 > +
 > +// Calculates the number of elements allocated for pointer P,
 > +// the type of the element is stored in Ty.
 > +const SCEV *PointerTracking::computeAllocationCount(Value *P,
 > +                                                    const Type  
*&Ty) const {
 > +  Value *V = P->stripPointerCasts();
 > +  if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) {
 > +    Value *arraySize = AI->getArraySize();
 > +    Ty = AI->getAllocatedType();
 > +    // arraySize elements of type Ty.
 > +    return SE->getSCEV(arraySize);
 > +  }
 > +
 > +  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
 > +    if (GV->hasInitializer()) {
 > +      // FIXME: should check for weak/non-overriddable symbol here?

GV->mayBeOverridden() is the check.

 > +      Constant *C = GV->getInitializer();
 > +      if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType 
())) {
 > +        Ty = ATy->getElementType();
 > +        return SE->getConstant(Type::Int32Ty, ATy->getNumElements 
());
 > +      }
 > +    }
 > +    Ty = GV->getType();
 > +    return SE->getConstant(Type::Int32Ty, 1);
 > +    //TODO: implement more tracking for globals
 > +  }
 > +
 > +  if (CallInst *CI = dyn_cast<CallInst>(V)) {
 > +    CallSite CS(CI);
 > +    Function *F = dyn_cast<Function>(CS.getCalledValue()- 
 >stripPointerCasts());
 > +    const Loop *L = LI->getLoopFor(CI->getParent());
 > +    if (F == callocFunc) {
 > +      Ty = Type::Int8Ty;
 > +      // calloc allocates arg0*arg1 bytes.
 > +      return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV 
(CS.getArgument(0)),
 > +                                               SE->getSCEV 
(CS.getArgument(1))),
 > +                                L);
 > +    } else if (F == reallocFunc) {
 > +      Ty = Type::Int8Ty;
 > +      // realloc allocates arg1 bytes.
 > +      return SE->getSCEVAtScope(CS.getArgument(1), L);
 > +    }
 > +  }
 > +
 > +  return SE->getCouldNotCompute();
 > +}
 > +
 > +// Calculates the number of elements of type Ty allocated for P.
 > +const SCEV *PointerTracking::computeAllocationCountForType(Value *P,
 > +                                                           const  
Type *Ty)
 > +  const {
 > +    const Type *elementTy;
 > +    const SCEV *Count = computeAllocationCount(P, elementTy);
 > +    if (isa<SCEVCouldNotCompute>(Count))
 > +      return Count;
 > +    if (elementTy == Ty)
 > +      return Count;
 > +
 > +    unsigned want_bits = Ty->getPrimitiveSizeInBits();
 > +    unsigned have_bits = elementTy->getPrimitiveSizeInBits();
 > +    if (have_bits && want_bits) {
 > +      if (want_bits == have_bits)
 > +        return Count;
 > +      if (have_bits % want_bits) //fractional counts not possible
 > +          return SE->getCouldNotCompute();
 > +      return SE->getMulExpr(Count, SE->getConstant(Count->getType(),
 > +                                                   have_bits/ 
want_bits));
 > +    }

This doesn't handle odd-sized integer types like i1 and i2 correctly.  
For
these, getPrimitiveSizeInBits returns 1 and 2, respectively, but both
are typically allocated in memory in one byte. I appreciate that you're
trying to support the no-TargetData case as much as possible, but I  
think
the only thing that can be done is the elementTy == Ty case above.

Also, FWIW, LLVM style tends to use more capital letters and fewer
underscores.

 > +
 > +    if (!TD) // need TargetData from this point forward
 > +      return SE->getCouldNotCompute();
 > +
 > +    uint64_t elementsize = TD->getTypeAllocSize(elementTy);
 > +    uint64_t wantsize = TD->getTypeStoreSize(Ty);
 > +    if (elementsize == wantsize)
 > +      return Count;
 > +    if (elementsize % wantsize) //fractional counts not possible
 > +      return SE->getCouldNotCompute();
 > +    return SE->getMulExpr(Count, SE->getConstant(Count->getType(),
 > +                                                 elementsize/ 
wantsize));
 > +}

Since you're doing elementsize/wantsize etc., both sizes need to be the
AllocSize here, not the StoreSize.

Dan