[cfe-dev] Symbolic Extents

Ted Kremenek kremenek at apple.com
Wed Jun 30 21:11:26 PDT 2010


On Jun 30, 2010, at 5:47 PM, Jordy Rose wrote:

> New version, with SymbolExtent! New API:
> 
> - SymbolExtent *SymbolManager::getExtentSymbol(const MemRegion *MR)
> - virtual SVal MemRegion::getExtent(ValueManager &ValMgr). This uses a
> ValueManager rather than just a SymbolManager because some types return
> plain integer values.
> - virtual const llvm::APSInt *SValuator::getKnownValue(SVal V). This can
> be used in more places, such as GRState::isEqual, but I'll save that for a
> separate commit.
> 
> Extent values are size_ts, since that's how they come in.
> RegionStore::getSizeInElements() still returns a signed value, but any code
> that eventually compares an extent to an index should use
> ValueManager::convertToArrayIndex() on the extent first.
> 
> I also pushed static bounds checking up to DeclRegion, rather than just
> VarRegion. This catches overflows in FieldRegions as well (with a case to
> handle the zero-length array idiom).
> 
> I wasn't sure how to implement the liveness tracking for SymbolExtent. We
> can't just say "SR.markLive(R->getExtent().getAsSymbol())" since that will
> create the extent symbol if it doesn't already exist. Any ideas?<symbolic-extents.patch>

Great work Jordy.  In response to your last point, instead of explicitly marking extent symbols live, how about we just define them as live if the corresponding region is live?  That is, put the smarts in SymbolManager::isLive(), and don't even bother marking them as live.

Here are comments on the patch:

Index: include/clang/Checker/PathSensitive/SymbolManager.h
===================================================================
--- include/clang/Checker/PathSensitive/SymbolManager.h	(revision 107347)
+++ include/clang/Checker/PathSensitive/SymbolManager.h	(working copy)
@@ -38,7 +38,7 @@
 class SymExpr : public llvm::FoldingSetNode {
 public:
   enum Kind { BEGIN_SYMBOLS,
-              RegionValueKind, ConjuredKind, DerivedKind,
+              RegionValueKind, ConjuredKind, DerivedKind, ExtentKind,
               END_SYMBOLS,
               SymIntKind, SymSymKind };
 private:
@@ -189,6 +189,34 @@
   }
 };
 
+class SymbolExtent : public SymbolData {
+  const MemRegion *R;
+  
+public:
+  SymbolExtent(SymbolID sym, const MemRegion *r)
+  : SymbolData(ExtentKind, sym), R(r) {}
+
+  const MemRegion *getRegion() const { return R; }
+
+  QualType getType(ASTContext&) const;
+
+  void dumpToStream(llvm::raw_ostream &os) const;
+
+  static void Profile(llvm::FoldingSetNodeID& profile, const MemRegion *R) {
+    profile.AddInteger((unsigned) ExtentKind);
+    profile.AddPointer(R);
+  }
+
+  virtual void Profile(llvm::FoldingSetNodeID& profile) {
+    Profile(profile, R);
+  }
+
+  // Implement isa<T> support.
+  static inline bool classof(const SymExpr* SE) {
+    return SE->getKind() == ExtentKind;
+  }
+};

Looks good to me.  One suggestion: should we restrict this to SubRegions instead of just MemRegions?  MemSpaceRegions probably don't have meaningful extents.

+
 // SymIntExpr - Represents symbolic expression like 'x' + 3.
 class SymIntExpr : public SymExpr {
   const SymExpr *LHS;
@@ -305,6 +333,8 @@
   const SymbolDerived *getDerivedSymbol(SymbolRef parentSymbol,
                                         const TypedRegion *R);
 
+  const SymbolExtent *getExtentSymbol(const MemRegion *R);
+
   const SymIntExpr *getSymIntExpr(const SymExpr *lhs, BinaryOperator::Opcode op,
                                   const llvm::APSInt& rhs, QualType t);
 
Index: include/clang/Checker/PathSensitive/MemRegion.h
===================================================================
--- include/clang/Checker/PathSensitive/MemRegion.h	(revision 107347)
+++ include/clang/Checker/PathSensitive/MemRegion.h	(working copy)
@@ -34,6 +34,7 @@
 class MemSpaceRegion;
 class LocationContext;
 class StackFrameContext;
+class ValueManager;
 class VarRegion;
 
 //===----------------------------------------------------------------------===//
@@ -106,6 +107,8 @@
   
   bool hasStackParametersStorage() const;
 
+  virtual SVal getExtent(ValueManager& ValMgr) const = 0;
+


Again, consider putting this in SubRegion, instead of MemRegion.



   virtual void dumpToStream(llvm::raw_ostream& os) const;
 
   void dump() const;
@@ -135,6 +138,8 @@
   MemRegionManager* getMemRegionManager() const { return Mgr; }
 
 public:
+  SVal getExtent(ValueManager& ValMgr) const { return UndefinedVal(); }
+

This isn't needed if we only allow SubRegions to have extents.


Index: include/clang/Checker/PathSensitive/SValuator.h
===================================================================
--- include/clang/Checker/PathSensitive/SValuator.h	(revision 107347)
+++ include/clang/Checker/PathSensitive/SValuator.h	(working copy)
@@ -52,6 +52,8 @@
 
   virtual SVal EvalBinOpLN(const GRState *state, BinaryOperator::Opcode Op,
                            Loc lhs, NonLoc rhs, QualType resultTy) = 0;
+
+  virtual const llvm::APSInt *getKnownValue(const GRState *state, SVal V) = 0;


Please add a doxygen comment here explaining what this method does.


   
   SVal EvalBinOp(const GRState *ST, BinaryOperator::Opcode Op,
                  SVal L, SVal R, QualType T);
Index: lib/Checker/SymbolManager.cpp
===================================================================
--- lib/Checker/SymbolManager.cpp	(revision 107347)
+++ lib/Checker/SymbolManager.cpp	(working copy)
@@ -74,6 +74,10 @@
      << getParentSymbol() << ',' << getRegion() << '}';
 }
 
+void SymbolExtent::dumpToStream(llvm::raw_ostream& os) const {
+  os << "extent_$" << getSymbolID() << '{' << getRegion() << '}';
+}
+

Looks good.


 void SymbolRegionValue::dumpToStream(llvm::raw_ostream& os) const {
   os << "reg_$" << getSymbolID() << "<" << R << ">";
 }
@@ -130,6 +134,22 @@
   return cast<SymbolDerived>(SD);
 }
 
+const SymbolExtent*
+SymbolManager::getExtentSymbol(const MemRegion *R) {
+  llvm::FoldingSetNodeID profile;
+  SymbolExtent::Profile(profile, R);
+  void* InsertPos;
+  SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
+  if (!SD) {
+    SD = (SymExpr*) BPAlloc.Allocate<SymbolExtent>();
+    new (SD) SymbolExtent(SymbolCounter, R);
+    DataSet.InsertNode(SD, InsertPos);
+    ++SymbolCounter;
+  }
+
+  return cast<SymbolExtent>(SD);
+}


Looks good.

+
 const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs,
                                                BinaryOperator::Opcode op,
                                                const llvm::APSInt& v,
@@ -170,11 +190,14 @@
   return T;
 }
 
-
 QualType SymbolDerived::getType(ASTContext& Ctx) const {
   return R->getValueType(Ctx);
 }
 
+QualType SymbolExtent::getType(ASTContext& Ctx) const {
+  return Ctx.getSizeType();
+}


Looks good.


+
 QualType SymbolRegionValue::getType(ASTContext& C) const {
   return R->getValueType(C);
 }
@@ -212,7 +235,8 @@
 
   // Interogate the symbol.  It may derive from an input value to
   // the analyzed function/method.
-  return isa<SymbolRegionValue>(sym);
+  // FIXME: Extents die with their regions.
+  return isa<SymbolRegionValue>(sym) || isa<SymbolExtent>(sym);
 }


See my comment above.  We can define a SymbolExtent as live if the region is live.  This involves changing:

  bool SymbolReaper::isLive(SymbolRef sym) {

to:

  bool SymbolReaper::isLive(const Stmt* Loc, SymbolRef sym) {

and then querying the region of SymbolExtent:

    if (const SymbolExtent *extent = dyn_cast<SymbolExtent>(sym)) {
	const SubRegion *R = extent->getRegion();
        while (R) {
          if (const VarRegion *VR = dyn_cast<VarRegion>(R))
            return isLive(Loc, VR);
          if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(R))
            return isLive(SR->getSymbol());
         R = dyn_cast<SubRegion>(R->getSuperRegion());
       }
       return false;
    }
 
 bool SymbolReaper::isLive(const Stmt* Loc, const Stmt* ExprVal) const {
Index: lib/Checker/MemRegion.cpp
===================================================================
--- lib/Checker/MemRegion.cpp	(revision 107347)
+++ lib/Checker/MemRegion.cpp	(working copy)
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Checker/PathSensitive/MemRegion.h"
+#include "clang/Checker/PathSensitive/ValueManager.h"
 #include "clang/Analysis/AnalysisContext.h"
 #include "clang/Analysis/Support/BumpVector.h"
 #include "clang/AST/CharUnits.h"
@@ -171,6 +172,43 @@
 }
 
 //===----------------------------------------------------------------------===//
+// Region extents.
+//===----------------------------------------------------------------------===//
+
+SVal DeclRegion::getExtent(ValueManager& ValMgr) const {
+  ASTContext& Ctx = ValMgr.getContext();
+  QualType T = getDesugaredValueType(Ctx);
+
+  // FIXME: Handle variable-length arrays.
+  if (isa<VariableArrayType>(T))
+    return UnknownVal();
+
+  CharUnits Size = Ctx.getTypeSizeInChars(T);
+  // Ignore extents for non-indexable types like functions and void.
+  // As a side effect, this supports the "zero-length array" idiom, where the
+  // last element of a struct is declared as an array of length 0, then used to
+  // refer to extra memory allocated along with the struct.
+  if (Size.isZero())
+    return UnknownVal();

Should a Decl with size 0 have an unknown extent?

+
+  QualType SizeTy = Ctx.getSizeType();
+  return ValMgr.makeIntVal(Size.getQuantity(), SizeTy);
+}

Looks great.


+
+SVal AllocaRegion::getExtent(ValueManager& ValMgr) const {
+  return nonloc::SymbolVal(ValMgr.getSymbolManager().getExtentSymbol(this));
+}
+
+SVal SymbolicRegion::getExtent(ValueManager& ValMgr) const {
+  return nonloc::SymbolVal(ValMgr.getSymbolManager().getExtentSymbol(this));
+}
+
+SVal StringRegion::getExtent(ValueManager& ValMgr) const {
+  QualType SizeTy = ValMgr.getContext().getSizeType();
+  return ValMgr.makeIntVal(getStringLiteral()->getByteLength()+1, SizeTy);
+}

Looks great.


Index: lib/Checker/SimpleSValuator.cpp
===================================================================
--- lib/Checker/SimpleSValuator.cpp	(revision 107347)
+++ lib/Checker/SimpleSValuator.cpp	(working copy)
@@ -34,6 +34,8 @@
                            Loc lhs, Loc rhs, QualType resultTy);
   virtual SVal EvalBinOpLN(const GRState *state, BinaryOperator::Opcode op,
                            Loc lhs, NonLoc rhs, QualType resultTy);
+
+  virtual const llvm::APSInt *getKnownValue(const GRState *state, SVal V);


Please add doxygen comment here.

   
   SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
                      const llvm::APSInt &RHS, QualType resultTy);
@@ -819,3 +821,21 @@
   return state->getStateManager().getStoreManager().EvalBinOp(op, lhs,
                                                               rhs, resultTy);
 }
+
+const llvm::APSInt *SimpleSValuator::getKnownValue(const GRState *state,
+                                                   SVal V) {
+  if (V.isUnknownOrUndef())
+    return NULL;
+
+  if (loc::ConcreteInt* X = dyn_cast<loc::ConcreteInt>(&V))
+    return &X->getValue();
+
+  if (nonloc::ConcreteInt* X = dyn_cast<nonloc::ConcreteInt>(&V))
+    return &X->getValue();
+
+  if (SymbolRef Sym = V.getAsSymbol())
+    return state->getSymVal(Sym);
+
+  // FIXME: Add support for SymExprs.
+  return NULL;
+}

Looks great.


Index: lib/Checker/RegionStore.cpp
===================================================================
--- lib/Checker/RegionStore.cpp	(revision 107347)
+++ lib/Checker/RegionStore.cpp	(working copy)
@@ -372,18 +372,6 @@
   // Region "extents".
   //===------------------------------------------------------------------===//
 
-  const GRState *setExtent(const GRState *state,const MemRegion* R,SVal Extent){
-    return state->set<RegionExtents>(R, Extent);
-  }
-
-  Optional<SVal> getExtent(const GRState *state, const MemRegion *R) {
-    const SVal *V = state->get<RegionExtents>(R);
-    if (V)
-      return *V;
-    else
-      return Optional<SVal>();
-  }
-

I'm so happy to see this.  In addition, this code can go:

> namespace { class RegionExtents {}; }
> static int RegionExtentsIndex = 0;
> namespace clang {
>   template<> struct GRStateTrait<RegionExtents>
>     : public GRStatePartialTrait<llvm::ImmutableMap<const MemRegion*, SVal> > {
>     static void* GDMIndex() { return &RegionExtentsIndex; }
>   };
> }




   DefinedOrUnknownSVal getSizeInElements(const GRState *state,
                                          const MemRegion* R, QualType EleTy);
 
@@ -744,87 +732,19 @@
 DefinedOrUnknownSVal RegionStoreManager::getSizeInElements(const GRState *state,
                                                            const MemRegion *R,
                                                            QualType EleTy) {
+  SVal Size = R->getExtent(ValMgr);
+  SValuator &SVator = ValMgr.getSValuator();
+  const llvm::APSInt *SizeInt = SVator.getKnownValue(state, Size);
+  if (!SizeInt)
+    return UnknownVal();
 
-  switch (R->getKind()) {
-    case MemRegion::CXXThisRegionKind:
-      assert(0 && "Cannot get size of 'this' region");
-    case MemRegion::GenericMemSpaceRegionKind:
-    case MemRegion::StackLocalsSpaceRegionKind:
-    case MemRegion::StackArgumentsSpaceRegionKind:
-    case MemRegion::HeapSpaceRegionKind:
-    case MemRegion::GlobalsSpaceRegionKind:
-    case MemRegion::UnknownSpaceRegionKind:
-      assert(0 && "Cannot index into a MemSpace");
-      return UnknownVal();
+  CharUnits RegionSize = CharUnits::fromQuantity(SizeInt->getSExtValue());
+  CharUnits EleSize = getContext().getTypeSizeInChars(EleTy);
 
-    case MemRegion::FunctionTextRegionKind:
-    case MemRegion::BlockTextRegionKind:
-    case MemRegion::BlockDataRegionKind:
-      // Technically this can happen if people do funny things with casts.
-      return UnknownVal();
-
-      // Not yet handled.
-    case MemRegion::AllocaRegionKind:
-    case MemRegion::CompoundLiteralRegionKind:
-    case MemRegion::ElementRegionKind:
-    case MemRegion::FieldRegionKind:
-    case MemRegion::ObjCIvarRegionKind:
-    case MemRegion::CXXObjectRegionKind:
-      return UnknownVal();
-
-    case MemRegion::SymbolicRegionKind: {
-      const SVal *Size = state->get<RegionExtents>(R);
-      if (!Size)
-        return UnknownVal();
-      const nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(Size);
-      if (!CI)
-        return UnknownVal();
-
-      CharUnits RegionSize =
-        CharUnits::fromQuantity(CI->getValue().getSExtValue());
-      CharUnits EleSize = getContext().getTypeSizeInChars(EleTy);
-      assert(RegionSize % EleSize == 0);
-
-      return ValMgr.makeIntVal(RegionSize / EleSize, false);
-    }
-
-    case MemRegion::StringRegionKind: {
-      const StringLiteral* Str = cast<StringRegion>(R)->getStringLiteral();
-      // We intentionally made the size value signed because it participates in
-      // operations with signed indices.
-      return ValMgr.makeIntVal(Str->getByteLength()+1, false);
-    }
-
-    case MemRegion::VarRegionKind: {
-      const VarRegion* VR = cast<VarRegion>(R);
-      ASTContext& Ctx = getContext();
-      // Get the type of the variable.
-      QualType T = VR->getDesugaredValueType(Ctx);
-
-      // FIXME: Handle variable-length arrays.
-      if (isa<VariableArrayType>(T))
-        return UnknownVal();
-
-      CharUnits EleSize = Ctx.getTypeSizeInChars(EleTy);
-
-      if (const ConstantArrayType* CAT = dyn_cast<ConstantArrayType>(T)) {
-        // return the size as signed integer.
-        CharUnits RealEleSize = Ctx.getTypeSizeInChars(CAT->getElementType());
-        CharUnits::QuantityType EleRatio = RealEleSize / EleSize;
-        int64_t Length = CAT->getSize().getSExtValue();
-        return ValMgr.makeIntVal(Length * EleRatio, false);
-      }
-
-      // Clients can reinterpret ordinary variables as arrays, possibly of
-      // another type. The width is rounded down to ensure that an access is
-      // entirely within bounds.
-      CharUnits VarSize = Ctx.getTypeSizeInChars(T);
-      return ValMgr.makeIntVal(VarSize / EleSize, false);
-    }
-  }
-
-  assert(0 && "Unreachable");
-  return UnknownVal();
+  // If a variable is reinterpreted as a type that doesn't fit into a larger
+  // type evenly, round it down.
+  // This is a signed value, since it's used in arithmetic with signed indices.
+  return ValMgr.makeIntVal(RegionSize / EleSize, false);
 }


Look at that shrinkage!  Awesome.

 
 //===----------------------------------------------------------------------===//
Index: lib/Checker/MallocChecker.cpp
===================================================================
--- lib/Checker/MallocChecker.cpp	(revision 107347)
+++ lib/Checker/MallocChecker.cpp	(working copy)
@@ -172,15 +172,24 @@
   unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
   ValueManager &ValMgr = C.getValueManager();
 
+  // Set the return value.
   SVal RetVal = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count);
+  state = state->BindExpr(CE, RetVal);
 
-  state = C.getEngine().getStoreManager().setExtent(state, RetVal.getAsRegion(),
-                                                    Size);
-
+  // Fill the region with the initialization value.
   state = state->bindDefault(RetVal, Init);
 
-  state = state->BindExpr(CE, RetVal);
-  
+  // Set the region's extent equal to the Size parameter.
+  const MemRegion *R = RetVal.getAsRegion();
+  DefinedOrUnknownSVal Extent =
+    cast<DefinedOrUnknownSVal>(R->getExtent(ValMgr));
+  DefinedOrUnknownSVal DefinedSize = cast<DefinedOrUnknownSVal>(Size);
+
+  SValuator &SVator = ValMgr.getSValuator();
+  DefinedOrUnknownSVal ExtentMatchesSize =
+    SVator.EvalEQ(state, Extent, DefinedSize);
+  state = state->Assume(ExtentMatchesSize, true);
+

Looks good.


   SymbolRef Sym = RetVal.getAsLocSymbol();
   assert(Sym);
   // Set the symbol's state to Allocated.
Index: lib/Checker/CastSizeChecker.cpp
===================================================================
--- lib/Checker/CastSizeChecker.cpp	(revision 107347)
+++ lib/Checker/CastSizeChecker.cpp	(working copy)
@@ -44,7 +44,8 @@
 
   QualType ToPointeeTy = ToPTy->getPointeeType();
 
-  const MemRegion *R = C.getState()->getSVal(E).getAsRegion();
+  const GRState *state = C.getState();
+  const MemRegion *R = state->getSVal(E).getAsRegion();
   if (R == 0)
     return;
 
@@ -52,19 +53,18 @@
   if (SR == 0)
     return;
 
-  llvm::Optional<SVal> V = 
-                    C.getEngine().getStoreManager().getExtent(C.getState(), SR);
-  if (!V)
-    return;
+  ValueManager &ValMgr = C.getValueManager();
+  SVal Extent = R->getExtent(ValMgr);
 
-  const nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(V);
-  if (!CI)
+  SValuator &SVator = ValMgr.getSValuator();
+  const llvm::APSInt *ExtentInt = SVator.getKnownValue(state, Extent);
+  if (!ExtentInt)
     return;
 
-  CharUnits RegionSize = CharUnits::fromQuantity(CI->getValue().getSExtValue());
+  CharUnits RegionSize = CharUnits::fromQuantity(ExtentInt->getSExtValue());
   CharUnits TypeSize = C.getASTContext().getTypeSizeInChars(ToPointeeTy);
   
-  // void, and a few other un-sizeable types
+  // Ignore void, and a few other un-sizeable types.
   if (TypeSize.isZero())
     return;
   

Looks great.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-dev/attachments/20100630/d02d8a8d/attachment.html>


More information about the cfe-dev mailing list