[cfe-dev] Allowing checkers to mark symbols as live
Zhongxing Xu
xuzhongxing at gmail.com
Wed Aug 11 00:07:53 PDT 2010
Otherwise the patch looks good to me.
On Wed, Aug 11, 2010 at 3:06 PM, Zhongxing Xu <xuzhongxing at gmail.com> wrote:
> Index: include/clang/Checker/PathSensitive/SymbolManager.h
> ===================================================================
> --- include/clang/Checker/PathSensitive/SymbolManager.h (revision 110696)
> +++ include/clang/Checker/PathSensitive/SymbolManager.h (working copy)
> @@ -40,6 +40,7 @@
> public:
> enum Kind { BEGIN_SYMBOLS,
> RegionValueKind, ConjuredKind, DerivedKind, ExtentKind,
> + MetadataKind,
> END_SYMBOLS,
> SymIntKind, SymSymKind };
> private:
> @@ -218,6 +219,47 @@
> }
> };
>
> +class SymbolMetadata : public SymbolData {
> + const MemRegion* R;
> + const Stmt* S;
> + QualType T;
> + unsigned Count;
> + const void* Tag;
> +public:
> + SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt* s, QualType t,
> + unsigned count, const void* tag)
> + : SymbolData(MetadataKind, sym), R(r), S(s), T(t), Count(count), Tag(tag) {}
> +
> + const MemRegion *getRegion() const { return R; }
> + const Stmt* getStmt() const { return S; }
> + unsigned getCount() const { return Count; }
> + const void* getTag() const { return Tag; }
> +
> + QualType getType(ASTContext&) const;
> +
> + void dumpToStream(llvm::raw_ostream &os) const;
> +
> + static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
> + const Stmt *S, QualType T, unsigned Count,
> + const void *Tag) {
> + profile.AddInteger((unsigned) ExtentKind);
> + profile.AddPointer(R);
> + profile.AddPointer(S);
> + profile.Add(T);
> + profile.AddInteger(Count);
> + profile.AddPointer(Tag);
> + }
> +
> + virtual void Profile(llvm::FoldingSetNodeID& profile) {
> + Profile(profile, R, S, T, Count, Tag);
> + }
> +
> + // Implement isa<T> support.
> + static inline bool classof(const SymExpr* SE) {
> + return SE->getKind() == MetadataKind;
> + }
> +};
> +
> // SymIntExpr - Represents symbolic expression like 'x' + 3.
> class SymIntExpr : public SymExpr {
> const SymExpr *LHS;
> @@ -336,6 +378,10 @@
>
> const SymbolExtent *getExtentSymbol(const SubRegion *R);
>
> + const SymbolMetadata* getMetadataSymbol(const MemRegion* R, const Stmt* S,
> + QualType T, unsigned VisitCount,
> + const void* SymbolTag = 0);
> +
> const SymIntExpr *getSymIntExpr(const SymExpr *lhs,
> BinaryOperator::Opcode op,
> const llvm::APSInt& rhs, QualType t);
>
> @@ -359,6 +405,7 @@
> typedef llvm::DenseSet<SymbolRef> SetTy;
>
> SetTy TheLiving;
> + SetTy MetadataInUse;
> SetTy TheDead;
> const LocationContext *LCtx;
> const Stmt *Loc;
> @@ -374,13 +421,12 @@
> const Stmt *getCurrentStatement() const { return Loc; }
>
> bool isLive(SymbolRef sym);
> -
> + bool isLive(const MemRegion *MR);
> bool isLive(const Stmt *ExprVal) const;
> -
> - bool isLive(const VarRegion *VR) const;
> -
> +
> void markLive(SymbolRef sym);
> + void markInUse(const SymbolMetadata *sym);
> bool maybeDead(SymbolRef sym);
>
> The above all looks good.
>
> typedef SetTy::const_iterator dead_iterator;
> @@ -389,6 +435,10 @@
> bool hasDeadSymbols() const {
> return !TheDead.empty();
> }
> +
> + bool isDead(SymbolRef sym) const {
> + return TheDead.count(sym);
> + }
> };
>
> Yes, this API is also useful for MallocChecker.
>
> class SymbolVisitor {
> Index: include/clang/Checker/PathSensitive/Checker.h
> ===================================================================
> --- include/clang/Checker/PathSensitive/Checker.h (revision 110696)
> +++ include/clang/Checker/PathSensitive/Checker.h (working copy)
> @@ -266,6 +266,8 @@
> virtual void EvalEndPath(GREndPathNodeBuilder &B, void *tag,
> GRExprEngine &Eng) {}
>
> + virtual void MarkLiveSymbols(const GRState *state, SymbolReaper
> &SymReaper) {}
> +
> virtual void VisitBranchCondition(GRBranchNodeBuilder &Builder,
> GRExprEngine &Eng,
> const Stmt *Condition, void *tag) {}
> Index: include/clang/Checker/PathSensitive/ValueManager.h
> ===================================================================
> --- include/clang/Checker/PathSensitive/ValueManager.h (revision 110696)
> +++ include/clang/Checker/PathSensitive/ValueManager.h (working copy)
> @@ -106,6 +106,9 @@
> DefinedOrUnknownSVal getDerivedRegionValueSymbolVal(SymbolRef parentSymbol,
> const TypedRegion *R);
>
> + DefinedSVal getMetadataSymbolVal(const void *SymbolTag, const MemRegion *MR,
> + const Expr *E, QualType T, unsigned Count);
> +
> DefinedSVal getFunctionPointer(const FunctionDecl *FD);
>
> DefinedSVal getBlockPointer(const BlockDecl *BD, CanQualType locTy,
> Index: lib/Checker/SymbolManager.cpp
> ===================================================================
> --- lib/Checker/SymbolManager.cpp (revision 110696)
> +++ lib/Checker/SymbolManager.cpp (working copy)
> @@ -78,6 +78,11 @@
> os << "extent_$" << getSymbolID() << '{' << getRegion() << '}';
> }
>
> +void SymbolMetadata::dumpToStream(llvm::raw_ostream& os) const {
> + os << "meta_$" << getSymbolID() << '{'
> + << getRegion() << ',' << T.getAsString() << '}';
> +}
> +
> void SymbolRegionValue::dumpToStream(llvm::raw_ostream& os) const {
> os << "reg_$" << getSymbolID() << "<" << R << ">";
> }
> @@ -150,6 +155,24 @@
> return cast<SymbolExtent>(SD);
> }
>
> +const SymbolMetadata*
> +SymbolManager::getMetadataSymbol(const MemRegion* R, const Stmt* S, QualType T,
> + unsigned Count, const void* SymbolTag) {
> +
> + llvm::FoldingSetNodeID profile;
> + SymbolMetadata::Profile(profile, R, S, T, Count, SymbolTag);
> + void* InsertPos;
> + SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
> + if (!SD) {
> + SD = (SymExpr*) BPAlloc.Allocate<SymbolMetadata>();
> + new (SD) SymbolMetadata(SymbolCounter, R, S, T, Count, SymbolTag);
> + DataSet.InsertNode(SD, InsertPos);
> + ++SymbolCounter;
> + }
> +
> + return cast<SymbolMetadata>(SD);
> +}
> +
> const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs,
> BinaryOperator::Opcode op,
> const llvm::APSInt& v,
> @@ -198,6 +221,10 @@
> return Ctx.getSizeType();
> }
>
> +QualType SymbolMetadata::getType(ASTContext&) const {
> + return T;
> +}
> +
> QualType SymbolRegionValue::getType(ASTContext& C) const {
> return R->getValueType(C);
> }
> @@ -222,6 +249,10 @@
> TheDead.erase(sym);
> }
>
> +void SymbolReaper::markInUse(const SymbolMetadata *sym) {
> + MetadataInUse.insert(sym);
> +}
> +
> bool SymbolReaper::maybeDead(SymbolRef sym) {
> if (isLive(sym))
> return false;
> @@ -243,14 +274,22 @@
> }
>
> if (const SymbolExtent *extent = dyn_cast<SymbolExtent>(sym)) {
> - const MemRegion *Base = extent->getRegion()->getBaseRegion();
> - if (const VarRegion *VR = dyn_cast<VarRegion>(Base))
> - return isLive(VR);
> - if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Base))
> - return isLive(SR->getSymbol());
> + if (isLive(extent->getRegion())) {
> + markLive(sym);
> + return true;
> + }
> return false;
> }
>
> + if (const SymbolMetadata *metadata = dyn_cast<SymbolMetadata>(sym)) {
> + if (MetadataInUse.count(sym) && isLive(metadata->getRegion())) {
> + markLive(sym);
> + MetadataInUse.erase(sym);
> + return true;
> + }
> + return false;
> + }
> +
> // Interogate the symbol. It may derive from an input value to
> // the analyzed function/method.
> return isa<SymbolRegionValue>(sym);
> @@ -260,13 +299,32 @@
> return LCtx->getLiveVariables()->isLive(Loc, ExprVal);
> }
>
> -bool SymbolReaper::isLive(const VarRegion *VR) const {
> - const StackFrameContext *SFC = VR->getStackFrame();
> +bool SymbolReaper::isLive(const MemRegion *MR) {
> + MR = MR->getBaseRegion();
>
> - if (SFC == LCtx->getCurrentStackFrame())
> - return LCtx->getLiveVariables()->isLive(Loc, VR->getDecl());
> - else
> + if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR))
> + return isLive(SR->getSymbol());
> +
> + if (const VarRegion *VR = dyn_cast<VarRegion>(MR)) {
> + const StackFrameContext *SFC = VR->getStackFrame();
> +
> + if (SFC == LCtx->getCurrentStackFrame())
> + return LCtx->getLiveVariables()->isLive(Loc, VR->getDecl());
> +
> return SFC->isParentOf(LCtx->getCurrentStackFrame());
> + }
> +
> + // FIXME: This is a gross over-approximation. What we really need is a way to
> + // tell if anything still refers to this region. Unlike SymbolicRegions,
> + // AllocaRegions don't have associated symbols, though, so we don't actually
> + // have a way to track their liveness.
> + if (isa<AllocaRegion>(MR))
> + return true;
> +
> + if (isa<MemSpaceRegion>(MR))
> + return true;
> +
> + return false;
> }
>
> I don't think we should generalize the isLive(VarRegion*) interface to
> judge on all MemRegion*. The old interface is clear on what it does
> and it is the one needed by clients.
>
> Also, the implementation here is incomplete. A metadata symbol may
> associated with other kinds of regions not handled here, like
> FieldRegion.
>
> We need some way to collect all live regions. But that is too complex
> to fit in this patch. So I suggest we keep the old interface. and
> focus on adding metadata symbol in this patch.
>
> SymbolVisitor::~SymbolVisitor() {}
> Index: lib/Checker/ValueManager.cpp
> ===================================================================
> --- lib/Checker/ValueManager.cpp (revision 110696)
> +++ lib/Checker/ValueManager.cpp (working copy)
> @@ -117,7 +117,21 @@
> return nonloc::SymbolVal(sym);
> }
>
> +DefinedSVal ValueManager::getMetadataSymbolVal(const void *SymbolTag,
> + const MemRegion *MR,
> + const Expr *E, QualType T,
> + unsigned Count) {
> + assert(SymbolManager::canSymbolicate(T) && "Invalid metadata symbol type");
>
> + SymbolRef sym = SymMgr.getMetadataSymbol(MR, E, T, Count, SymbolTag);
> +
> + if (Loc::IsLocType(T))
> + return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym));
> +
> + return nonloc::SymbolVal(sym);
> +}
> +
> +
> DefinedOrUnknownSVal
> ValueManager::getDerivedRegionValueSymbolVal(SymbolRef parentSymbol,
> const TypedRegion *R) {
> Index: lib/Checker/GRExprEngine.cpp
> ===================================================================
> --- lib/Checker/GRExprEngine.cpp (revision 110696)
> +++ lib/Checker/GRExprEngine.cpp (working copy)
> @@ -579,16 +579,24 @@
> Builder->setAuditor(BatchAuditor.get());
>
> // Create the cleaned state.
> - const ExplodedNode *BasePred = Builder->getBasePredecessor();
> + const LocationContext *LC = EntryNode->getLocationContext();
> + SymbolReaper SymReaper(LC, CurrentStmt, SymMgr);
>
> - SymbolReaper SymReaper(BasePred->getLocationContext(), CurrentStmt, SymMgr);
> + if (AMgr.shouldPurgeDead()) {
> + const GRState *St = EntryNode->getState();
>
> - CleanedState = AMgr.shouldPurgeDead()
> - ? StateMgr.RemoveDeadBindings(EntryNode->getState(),
> -
> BasePred->getLocationContext()->getCurrentStackFrame(),
> - SymReaper)
> - : EntryNode->getState();
> + for (CheckersOrdered::iterator I = Checkers.begin(), E = Checkers.end();
> + I != E; ++I) {
> + Checker *checker = I->second;
> + checker->MarkLiveSymbols(St, SymReaper);
> + }
>
> + const StackFrameContext *SFC = LC->getCurrentStackFrame();
> + CleanedState = StateMgr.RemoveDeadBindings(St, SFC, SymReaper);
> + } else {
> + CleanedState = EntryNode->getState();
> + }
> +
> // Process any special transfer function for dead symbols.
> ExplodedNodeSet Tmp;
>
>
>
> On Wed, Aug 11, 2010 at 12:51 PM, Jordy Rose <jediknil at belkadan.com> wrote:
>> On Tue, 10 Aug 2010 18:03:20 +0800, Zhongxing Xu <xuzhongxing at gmail.com>
>> wrote:
>>> Just as your patch, before RemoveDeadBindings, checkers mark live
>>> symbols, SymbolReaper cache them. When SymbolReaper is queried about
>>> the liveness of those symbols, it first check if its associated region
>>> is alive, if so, it return true. otherwise return false.
>>
>> Right, then. Here's a new version with an added SymbolMetadata that
>> behaves like what you're describing. Is this what you meant?
>>
>> (Also attached, again: CStringChecker, to see the use of SymbolMetadata
>> and added cleanup code in EvalDeadSymbols.)
>>
>> Note: isDead() could probably use a better name. The reason it's there is
>> because this is about symbols stored in the GDM as values, which means we'd
>> already have to iterate over the whole symbol map just to find a single
>> dead symbol.
>>
>> What I'm not so happy about here is that the checker-writer could still
>> arbitrarily markLive() random symbols, instead of being restricted to
>> markInUse() (which is deliberately limited to metadata right now).
>> Admittedly, that was the /only/ choice before, so this is probably still an
>> improvement.
>
More information about the cfe-dev
mailing list