[cfe-dev] Allowing checkers to mark symbols as live

Zhongxing Xu xuzhongxing at gmail.com
Wed Aug 11 00:06:34 PDT 2010


Index: include/clang/Checker/PathSensitive/SymbolManager.h
===================================================================
--- include/clang/Checker/PathSensitive/SymbolManager.h	(revision 110696)
+++ include/clang/Checker/PathSensitive/SymbolManager.h	(working copy)
@@ -40,6 +40,7 @@
 public:
   enum Kind { BEGIN_SYMBOLS,
               RegionValueKind, ConjuredKind, DerivedKind, ExtentKind,
+              MetadataKind,
               END_SYMBOLS,
               SymIntKind, SymSymKind };
 private:
@@ -218,6 +219,47 @@
   }
 };

+class SymbolMetadata : public SymbolData {
+  const MemRegion* R;
+  const Stmt* S;
+  QualType T;
+  unsigned Count;
+  const void* Tag;
+public:
+  SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt* s, QualType t,
+                 unsigned count, const void* tag)
+  : SymbolData(MetadataKind, sym), R(r), S(s), T(t), Count(count), Tag(tag) {}
+
+  const MemRegion *getRegion() const { return R; }
+  const Stmt* getStmt() const { return S; }
+  unsigned getCount() const { return Count; }
+  const void* getTag() const { return Tag; }
+
+  QualType getType(ASTContext&) const;
+
+  void dumpToStream(llvm::raw_ostream &os) const;
+
+  static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R,
+                      const Stmt *S, QualType T, unsigned Count,
+                      const void *Tag) {
+    profile.AddInteger((unsigned) ExtentKind);
+    profile.AddPointer(R);
+    profile.AddPointer(S);
+    profile.Add(T);
+    profile.AddInteger(Count);
+    profile.AddPointer(Tag);
+  }
+
+  virtual void Profile(llvm::FoldingSetNodeID& profile) {
+    Profile(profile, R, S, T, Count, Tag);
+  }
+
+  // Implement isa<T> support.
+  static inline bool classof(const SymExpr* SE) {
+    return SE->getKind() == MetadataKind;
+  }
+};
+
 // SymIntExpr - Represents symbolic expression like 'x' + 3.
 class SymIntExpr : public SymExpr {
   const SymExpr *LHS;
@@ -336,6 +378,10 @@

   const SymbolExtent *getExtentSymbol(const SubRegion *R);

+  const SymbolMetadata* getMetadataSymbol(const MemRegion* R, const Stmt* S,
+                                          QualType T, unsigned VisitCount,
+                                          const void* SymbolTag = 0);
+
   const SymIntExpr *getSymIntExpr(const SymExpr *lhs,
BinaryOperator::Opcode op,
                                   const llvm::APSInt& rhs, QualType t);

@@ -359,6 +405,7 @@
   typedef llvm::DenseSet<SymbolRef> SetTy;

   SetTy TheLiving;
+  SetTy MetadataInUse;
   SetTy TheDead;
   const LocationContext *LCtx;
   const Stmt *Loc;
@@ -374,13 +421,12 @@
   const Stmt *getCurrentStatement() const { return Loc; }

   bool isLive(SymbolRef sym);
-
+  bool isLive(const MemRegion *MR);
   bool isLive(const Stmt *ExprVal) const;
-
-  bool isLive(const VarRegion *VR) const;
-
+
   void markLive(SymbolRef sym);
+  void markInUse(const SymbolMetadata *sym);
   bool maybeDead(SymbolRef sym);

The above all looks good.

   typedef SetTy::const_iterator dead_iterator;
@@ -389,6 +435,10 @@
   bool hasDeadSymbols() const {
     return !TheDead.empty();
   }
+
+  bool isDead(SymbolRef sym) const {
+    return TheDead.count(sym);
+  }
 };

Yes, this API is also useful for MallocChecker.

 class SymbolVisitor {
Index: include/clang/Checker/PathSensitive/Checker.h
===================================================================
--- include/clang/Checker/PathSensitive/Checker.h	(revision 110696)
+++ include/clang/Checker/PathSensitive/Checker.h	(working copy)
@@ -266,6 +266,8 @@
   virtual void EvalEndPath(GREndPathNodeBuilder &B, void *tag,
                            GRExprEngine &Eng) {}

+  virtual void MarkLiveSymbols(const GRState *state, SymbolReaper
&SymReaper) {}
+
   virtual void VisitBranchCondition(GRBranchNodeBuilder &Builder,
                                     GRExprEngine &Eng,
                                     const Stmt *Condition, void *tag) {}
Index: include/clang/Checker/PathSensitive/ValueManager.h
===================================================================
--- include/clang/Checker/PathSensitive/ValueManager.h	(revision 110696)
+++ include/clang/Checker/PathSensitive/ValueManager.h	(working copy)
@@ -106,6 +106,9 @@
   DefinedOrUnknownSVal getDerivedRegionValueSymbolVal(SymbolRef parentSymbol,
                                                       const TypedRegion *R);

+  DefinedSVal getMetadataSymbolVal(const void *SymbolTag, const MemRegion *MR,
+                                   const Expr *E, QualType T, unsigned Count);
+
   DefinedSVal getFunctionPointer(const FunctionDecl *FD);

   DefinedSVal getBlockPointer(const BlockDecl *BD, CanQualType locTy,
Index: lib/Checker/SymbolManager.cpp
===================================================================
--- lib/Checker/SymbolManager.cpp	(revision 110696)
+++ lib/Checker/SymbolManager.cpp	(working copy)
@@ -78,6 +78,11 @@
   os << "extent_$" << getSymbolID() << '{' << getRegion() << '}';
 }

+void SymbolMetadata::dumpToStream(llvm::raw_ostream& os) const {
+  os << "meta_$" << getSymbolID() << '{'
+     << getRegion() << ',' << T.getAsString() << '}';
+}
+
 void SymbolRegionValue::dumpToStream(llvm::raw_ostream& os) const {
   os << "reg_$" << getSymbolID() << "<" << R << ">";
 }
@@ -150,6 +155,24 @@
   return cast<SymbolExtent>(SD);
 }

+const SymbolMetadata*
+SymbolManager::getMetadataSymbol(const MemRegion* R, const Stmt* S, QualType T,
+                                 unsigned Count, const void* SymbolTag) {
+
+  llvm::FoldingSetNodeID profile;
+  SymbolMetadata::Profile(profile, R, S, T, Count, SymbolTag);
+  void* InsertPos;
+  SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
+  if (!SD) {
+    SD = (SymExpr*) BPAlloc.Allocate<SymbolMetadata>();
+    new (SD) SymbolMetadata(SymbolCounter, R, S, T, Count, SymbolTag);
+    DataSet.InsertNode(SD, InsertPos);
+    ++SymbolCounter;
+  }
+
+  return cast<SymbolMetadata>(SD);
+}
+
 const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs,
                                                BinaryOperator::Opcode op,
                                                const llvm::APSInt& v,
@@ -198,6 +221,10 @@
   return Ctx.getSizeType();
 }

+QualType SymbolMetadata::getType(ASTContext&) const {
+  return T;
+}
+
 QualType SymbolRegionValue::getType(ASTContext& C) const {
   return R->getValueType(C);
 }
@@ -222,6 +249,10 @@
   TheDead.erase(sym);
 }

+void SymbolReaper::markInUse(const SymbolMetadata *sym) {
+  MetadataInUse.insert(sym);
+}
+
 bool SymbolReaper::maybeDead(SymbolRef sym) {
   if (isLive(sym))
     return false;
@@ -243,14 +274,22 @@
   }

   if (const SymbolExtent *extent = dyn_cast<SymbolExtent>(sym)) {
-    const MemRegion *Base = extent->getRegion()->getBaseRegion();
-    if (const VarRegion *VR = dyn_cast<VarRegion>(Base))
-      return isLive(VR);
-    if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Base))
-      return isLive(SR->getSymbol());
+    if (isLive(extent->getRegion())) {
+      markLive(sym);
+      return true;
+    }
     return false;
   }

+  if (const SymbolMetadata *metadata = dyn_cast<SymbolMetadata>(sym)) {
+    if (MetadataInUse.count(sym) && isLive(metadata->getRegion())) {
+      markLive(sym);
+      MetadataInUse.erase(sym);
+      return true;
+    }
+    return false;
+  }
+
   // Interogate the symbol.  It may derive from an input value to
   // the analyzed function/method.
   return isa<SymbolRegionValue>(sym);
@@ -260,13 +299,32 @@
   return LCtx->getLiveVariables()->isLive(Loc, ExprVal);
 }

-bool SymbolReaper::isLive(const VarRegion *VR) const {
-  const StackFrameContext *SFC = VR->getStackFrame();
+bool SymbolReaper::isLive(const MemRegion *MR) {
+  MR = MR->getBaseRegion();

-  if (SFC == LCtx->getCurrentStackFrame())
-    return LCtx->getLiveVariables()->isLive(Loc, VR->getDecl());
-  else
+  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR))
+    return isLive(SR->getSymbol());
+
+  if (const VarRegion *VR = dyn_cast<VarRegion>(MR)) {
+    const StackFrameContext *SFC = VR->getStackFrame();
+
+    if (SFC == LCtx->getCurrentStackFrame())
+      return LCtx->getLiveVariables()->isLive(Loc, VR->getDecl());
+
     return SFC->isParentOf(LCtx->getCurrentStackFrame());
+  }
+
+  // FIXME: This is a gross over-approximation. What we really need is a way to
+  // tell if anything still refers to this region. Unlike SymbolicRegions,
+  // AllocaRegions don't have associated symbols, though, so we don't actually
+  // have a way to track their liveness.
+  if (isa<AllocaRegion>(MR))
+    return true;
+
+  if (isa<MemSpaceRegion>(MR))
+    return true;
+
+  return false;
 }

I don't think we should generalize the isLive(VarRegion*) interface to
judge on all MemRegion*. The old interface is clear on what it does
and it is the one needed by clients.

Also, the implementation here is incomplete. A metadata symbol may
associated with other kinds of regions not handled here, like
FieldRegion.

We need some way to collect all live regions. But that is too complex
to fit in this patch. So I suggest we keep the old interface. and
focus on adding metadata symbol in this patch.

 SymbolVisitor::~SymbolVisitor() {}
Index: lib/Checker/ValueManager.cpp
===================================================================
--- lib/Checker/ValueManager.cpp	(revision 110696)
+++ lib/Checker/ValueManager.cpp	(working copy)
@@ -117,7 +117,21 @@
   return nonloc::SymbolVal(sym);
 }

+DefinedSVal ValueManager::getMetadataSymbolVal(const void *SymbolTag,
+                                               const MemRegion *MR,
+                                               const Expr *E, QualType T,
+                                               unsigned Count) {
+  assert(SymbolManager::canSymbolicate(T) && "Invalid metadata symbol type");

+  SymbolRef sym = SymMgr.getMetadataSymbol(MR, E, T, Count, SymbolTag);
+
+  if (Loc::IsLocType(T))
+    return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym));
+
+  return nonloc::SymbolVal(sym);
+}
+
+
 DefinedOrUnknownSVal
 ValueManager::getDerivedRegionValueSymbolVal(SymbolRef parentSymbol,
                                              const TypedRegion *R) {
Index: lib/Checker/GRExprEngine.cpp
===================================================================
--- lib/Checker/GRExprEngine.cpp	(revision 110696)
+++ lib/Checker/GRExprEngine.cpp	(working copy)
@@ -579,16 +579,24 @@
     Builder->setAuditor(BatchAuditor.get());

   // Create the cleaned state.
-  const ExplodedNode *BasePred = Builder->getBasePredecessor();
+  const LocationContext *LC = EntryNode->getLocationContext();
+  SymbolReaper SymReaper(LC, CurrentStmt, SymMgr);

-  SymbolReaper SymReaper(BasePred->getLocationContext(), CurrentStmt, SymMgr);
+  if (AMgr.shouldPurgeDead()) {
+    const GRState *St = EntryNode->getState();

-  CleanedState = AMgr.shouldPurgeDead()
-    ? StateMgr.RemoveDeadBindings(EntryNode->getState(),
-
BasePred->getLocationContext()->getCurrentStackFrame(),
-                                  SymReaper)
-    : EntryNode->getState();
+    for (CheckersOrdered::iterator I = Checkers.begin(), E = Checkers.end();
+         I != E; ++I) {
+      Checker *checker = I->second;
+      checker->MarkLiveSymbols(St, SymReaper);
+    }

+    const StackFrameContext *SFC = LC->getCurrentStackFrame();
+    CleanedState = StateMgr.RemoveDeadBindings(St, SFC, SymReaper);
+  } else {
+    CleanedState = EntryNode->getState();
+  }
+
   // Process any special transfer function for dead symbols.
   ExplodedNodeSet Tmp;



On Wed, Aug 11, 2010 at 12:51 PM, Jordy Rose <jediknil at belkadan.com> wrote:
> On Tue, 10 Aug 2010 18:03:20 +0800, Zhongxing Xu <xuzhongxing at gmail.com>
> wrote:
>> Just as your patch, before RemoveDeadBindings, checkers mark live
>> symbols, SymbolReaper cache them. When SymbolReaper is queried about
>> the liveness of those symbols, it first check if its associated region
>> is alive, if so, it return true. otherwise return false.
>
> Right, then. Here's a new version with an added SymbolMetadata that
> behaves like what you're describing. Is this what you meant?
>
> (Also attached, again: CStringChecker, to see the use of SymbolMetadata
> and added cleanup code in EvalDeadSymbols.)
>
> Note: isDead() could probably use a better name. The reason it's there is
> because this is about symbols stored in the GDM as values, which means we'd
> already have to iterate over the whole symbol map just to find a single
> dead symbol.
>
> What I'm not so happy about here is that the checker-writer could still
> arbitrarily markLive() random symbols, instead of being restricted to
> markInUse() (which is deliberately limited to metadata right now).
> Admittedly, that was the /only/ choice before, so this is probably still an
> improvement.



More information about the cfe-dev mailing list