[cfe-commits] r111081 - in /cfe/trunk: lib/Checker/CStringChecker.cpp test/Analysis/string.c

Jordy Rose jediknil at belkadan.com
Sat Aug 14 14:02:52 PDT 2010


Author: jrose
Date: Sat Aug 14 16:02:52 2010
New Revision: 111081

URL: http://llvm.org/viewvc/llvm-project?rev=111081&view=rev
Log:
Update CStringChecker to take advantage of the new metadata symbols and region change callback. Now does basic tracking of string length for general regions. Currently this is still only used for modeling strlen().

Modified:
    cfe/trunk/lib/Checker/CStringChecker.cpp
    cfe/trunk/test/Analysis/string.c

Modified: cfe/trunk/lib/Checker/CStringChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Checker/CStringChecker.cpp?rev=111081&r1=111080&r2=111081&view=diff
==============================================================================
--- cfe/trunk/lib/Checker/CStringChecker.cpp (original)
+++ cfe/trunk/lib/Checker/CStringChecker.cpp Sat Aug 14 16:02:52 2010
@@ -15,6 +15,7 @@
 #include "GRExprEngineExperimentalChecks.h"
 #include "clang/Checker/BugReporter/BugType.h"
 #include "clang/Checker/PathSensitive/CheckerVisitor.h"
+#include "clang/Checker/PathSensitive/GRStateTrait.h"
 #include "llvm/ADT/StringSwitch.h"
 
 using namespace clang;
@@ -28,6 +29,15 @@
   static void *getTag() { static int tag; return &tag; }
 
   bool EvalCallExpr(CheckerContext &C, const CallExpr *CE);
+  void PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS);
+  void MarkLiveSymbols(const GRState *state, SymbolReaper &SR);
+  void EvalDeadSymbols(CheckerContext &C, SymbolReaper &SR);
+  bool WantsRegionChangeUpdate(const GRState *state);
+
+  const GRState *EvalRegionChanges(const GRState *state,
+                                   const MemRegion * const *Begin,
+                                   const MemRegion * const *End,
+                                   bool*);
 
   typedef void (CStringChecker::*FnCheck)(CheckerContext &, const CallExpr *);
 
@@ -46,7 +56,9 @@
   std::pair<const GRState*, const GRState*>
   AssumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty);
 
-  SVal GetCStringLength(CheckerContext &C, const GRState *state,
+  SVal GetCStringLengthForRegion(CheckerContext &C, const GRState *&state,
+                                 const Expr *Ex, const MemRegion *MR);
+  SVal GetCStringLength(CheckerContext &C, const GRState *&state,
                         const Expr *Ex, SVal Buf);
 
   bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
@@ -67,8 +79,21 @@
   void EmitOverlapBug(CheckerContext &C, const GRState *state,
                       const Stmt *First, const Stmt *Second);
 };
+
+class CStringLength {
+public:
+  typedef llvm::ImmutableMap<const MemRegion *, SVal> EntryMap;
+};
 } //end anonymous namespace
 
+namespace clang {
+  template <>
+  struct GRStateTrait<CStringLength> 
+    : public GRStatePartialTrait<CStringLength::EntryMap> {
+    static void *GDMIndex() { return CStringChecker::getTag(); }
+  };
+}
+
 void clang::RegisterCStringChecker(GRExprEngine &Eng) {
   Eng.registerCheck(new CStringChecker());
 }
@@ -382,7 +407,26 @@
   C.EmitReport(report);
 }
 
-SVal CStringChecker::GetCStringLength(CheckerContext &C, const GRState *state,
+SVal CStringChecker::GetCStringLengthForRegion(CheckerContext &C,
+                                               const GRState *&state,
+                                               const Expr *Ex,
+                                               const MemRegion *MR) {
+  // If there's a recorded length, go ahead and return it.
+  const SVal *Recorded = state->get<CStringLength>(MR);
+  if (Recorded)
+    return *Recorded;
+  
+  // Otherwise, get a new symbol and update the state.
+  unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
+  ValueManager &ValMgr = C.getValueManager();
+  QualType SizeTy = ValMgr.getContext().getSizeType();
+  SVal Strlen = ValMgr.getMetadataSymbolVal(getTag(), MR, Ex, SizeTy, Count);
+  
+  state = state->set<CStringLength>(MR, Strlen);
+  return Strlen;
+}
+
+SVal CStringChecker::GetCStringLength(CheckerContext &C, const GRState *&state,
                                       const Expr *Ex, SVal Buf) {
   const MemRegion *MR = Buf.getAsRegion();
   if (!MR) {
@@ -390,8 +434,7 @@
     // C string. In the context of locations, the only time we can issue such
     // a warning is for labels.
     if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
-      ExplodedNode *N = C.GenerateSink(state);
-      if (N) {
+      if (ExplodedNode *N = C.GenerateNode(state)) {
         if (!BT_NotCString)
           BT_NotCString = new BuiltinBug("API",
             "Argument is not a null-terminated string.");
@@ -413,86 +456,65 @@
       return UndefinedVal();
     }
 
-    // If it's not a region and not a label, it may be a constant location,
-    // or it may be unknown. Just conjure a value as usual (see end of method).
-
-  } else {
-    // If we have a region, strip casts from it and see if we can figure out
-    // its length. For anything we can't figure out, just conjure a value as
-    // usual (see end of method).
-    MR = MR->StripCasts();
-
-    switch (MR->getKind()) {
-    case MemRegion::StringRegionKind: {
-      ValueManager &ValMgr = C.getValueManager();
-      ASTContext &Ctx = ValMgr.getContext();
-      const StringLiteral *Str = cast<StringRegion>(MR)->getStringLiteral();
-
-      // Non-constant string literals may have been changed, so only return a
-      // known value if we know the literal is constant.
-      if (Str->getType().isConstant(Ctx)) {
-        QualType SizeTy = Ctx.getSizeType();
-        return ValMgr.makeIntVal(Str->getByteLength(), SizeTy);        
-      }
-
-      // FIXME: Handle the non-constant case. For now, just treat it like any
-      // other initialized region.
-      // FALL-THROUGH
-    }
-    case MemRegion::SymbolicRegionKind:
-    case MemRegion::AllocaRegionKind:
-    case MemRegion::VarRegionKind:
-    case MemRegion::FieldRegionKind:
-    case MemRegion::ObjCIvarRegionKind:
-      // FIXME: These need to be tracked!
-      break;
-    case MemRegion::CompoundLiteralRegionKind:
-      // FIXME: Can we track this? Is it necessary?
-      break;
-    case MemRegion::ElementRegionKind:
-      // FIXME: How can we handle this? It's not good enough to subtract the
-      // offset from the base string length; consider "123\x00567" and &a[5].
-      break;
-    default: {
-      // Other regions (mostly non-data) can't have a reliable C string length.
-      // In this case, an error is emitted and UndefinedVal is returned.
-      // The caller should always be prepared to handle this case.
-      ExplodedNode *N = C.GenerateSink(state);
-      if (N) {
-        if (!BT_NotCString)
-          BT_NotCString = new BuiltinBug("API",
-            "Argument is not a null-terminated string.");
-
-        llvm::SmallString<120> buf;
-        llvm::raw_svector_ostream os(buf);
-
-        os << "Argument to byte string function is ";
-
-        if (SummarizeRegion(os, C.getASTContext(), MR)) {
-          os << ", which is not a null-terminated string";
-        } else {
-          os << "not a null-terminated string";
-        }
-
-        // Generate a report for this bug.
-        EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
-                                                          os.str(), N);
+    // If it's not a region and not a label, give up.
+    return UnknownVal();
+  }
 
-        report->addRange(Ex->getSourceRange());
-        C.EmitReport(report);        
-      }
+  // If we have a region, strip casts from it and see if we can figure out
+  // its length. For anything we can't figure out, just return UnknownVal.
+  MR = MR->StripCasts();
+
+  switch (MR->getKind()) {
+  case MemRegion::StringRegionKind: {
+    // Modifying the contents of string regions is undefined [C99 6.4.5p6],
+    // so we can assume that the byte length is the correct C string length.
+    ValueManager &ValMgr = C.getValueManager();
+    QualType SizeTy = ValMgr.getContext().getSizeType();
+    const StringLiteral *Str = cast<StringRegion>(MR)->getStringLiteral();
+    return ValMgr.makeIntVal(Str->getByteLength(), SizeTy);
+  }
+  case MemRegion::SymbolicRegionKind:
+  case MemRegion::AllocaRegionKind:
+  case MemRegion::VarRegionKind:
+  case MemRegion::FieldRegionKind:
+  case MemRegion::ObjCIvarRegionKind:
+    return GetCStringLengthForRegion(C, state, Ex, MR);
+  case MemRegion::CompoundLiteralRegionKind:
+    // FIXME: Can we track this? Is it necessary?
+    return UnknownVal();
+  case MemRegion::ElementRegionKind:
+    // FIXME: How can we handle this? It's not good enough to subtract the
+    // offset from the base string length; consider "123\x00567" and &a[5].
+    return UnknownVal();
+  default:
+    // Other regions (mostly non-data) can't have a reliable C string length.
+    // In this case, an error is emitted and UndefinedVal is returned.
+    // The caller should always be prepared to handle this case.
+    if (ExplodedNode *N = C.GenerateNode(state)) {
+      if (!BT_NotCString)
+        BT_NotCString = new BuiltinBug("API",
+          "Argument is not a null-terminated string.");
+
+      llvm::SmallString<120> buf;
+      llvm::raw_svector_ostream os(buf);
+
+      os << "Argument to byte string function is ";
+
+      if (SummarizeRegion(os, C.getASTContext(), MR))
+        os << ", which is not a null-terminated string";
+      else
+        os << "not a null-terminated string";
+
+      // Generate a report for this bug.
+      EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
+                                                        os.str(), N);
 
-      return UndefinedVal();
-    }
+      report->addRange(Ex->getSourceRange());
+      C.EmitReport(report);        
     }
-  }
 
-  // If we can't track a certain region's C string length, or if we can't get a
-  // region from the SVal, conjure a value, for use in later constraints.
-  unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
-  ValueManager &ValMgr = C.getValueManager();
-  QualType SizeTy = ValMgr.getContext().getSizeType();
-  return ValMgr.getConjuredSymbolVal(getTag(), Ex, SizeTy, Count);
+    return UndefinedVal();
+  }
 }
 
 bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
@@ -664,19 +686,29 @@
   state = CheckNonNull(C, state, Arg, ArgVal);
 
   if (state) {
-    // Figure out what the length is, making sure the argument is a C string
-    // (or something similar to a C string). If the argument is valid, the
-    // length will be defined, and we can then set the return value.
     SVal StrLen = GetCStringLength(C, state, Arg, ArgVal);
-    if (!StrLen.isUndef()) {
-      state = state->BindExpr(CE, StrLen);
-      C.addTransition(state);
+
+    // If the argument isn't a valid C string, there's no valid state to
+    // transition to.
+    if (StrLen.isUndef())
+      return;
+
+    // If GetCStringLength couldn't figure out the length, conjure a return
+    // value, so it can be used in constraints, at least.
+    if (StrLen.isUnknown()) {
+      ValueManager &ValMgr = C.getValueManager();
+      unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
+      StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count);
     }
+
+    // Bind the return value.
+    state = state->BindExpr(CE, StrLen);
+    C.addTransition(state);
   }
 }
 
 //===----------------------------------------------------------------------===//
-// The driver method.
+// The driver method, and other Checker callbacks.
 //===----------------------------------------------------------------------===//
 
 bool CStringChecker::EvalCallExpr(CheckerContext &C, const CallExpr *CE) {
@@ -710,3 +742,129 @@
   (this->*EvalFunction)(C, CE);
   return true;
 }
+
+void CStringChecker::PreVisitDeclStmt(CheckerContext &C, const DeclStmt *DS) {
+  // Record string length for char a[] = "abc";
+  const GRState *state = C.getState();
+
+  for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
+       I != E; ++I) {
+    const VarDecl *D = dyn_cast<VarDecl>(*I);
+    if (!D)
+      continue;
+
+    // FIXME: Handle array fields of structs.
+    if (!D->getType()->isArrayType())
+      continue;
+
+    const Expr *Init = D->getInit();
+    if (!Init)
+      continue;
+    if (!isa<StringLiteral>(Init))
+      continue;
+
+    Loc VarLoc = state->getLValue(D, C.getPredecessor()->getLocationContext());
+    const MemRegion *MR = VarLoc.getAsRegion();
+    if (!MR)
+      continue;
+
+    SVal StrVal = state->getSVal(Init);
+    assert(StrVal.isValid() && "Initializer string is unknown or undefined");
+    DefinedOrUnknownSVal StrLen
+      = cast<DefinedOrUnknownSVal>(GetCStringLength(C, state, Init, StrVal));
+
+    state = state->set<CStringLength>(MR, StrLen);
+  }
+
+  C.addTransition(state);
+}
+
+bool CStringChecker::WantsRegionChangeUpdate(const GRState *state) {
+  CStringLength::EntryMap Entries = state->get<CStringLength>();
+  return !Entries.isEmpty();
+}
+
+const GRState *CStringChecker::EvalRegionChanges(const GRState *state,
+                                                 const MemRegion * const *Begin,
+                                                 const MemRegion * const *End,
+                                                 bool *) {
+  CStringLength::EntryMap Entries = state->get<CStringLength>();
+  if (Entries.isEmpty())
+    return state;
+
+  llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
+  llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
+
+  // First build sets for the changed regions and their super-regions.
+  for ( ; Begin != End; ++Begin) {
+    const MemRegion *MR = *Begin;
+    Invalidated.insert(MR);
+
+    SuperRegions.insert(MR);
+    while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
+      MR = SR->getSuperRegion();
+      SuperRegions.insert(MR);
+    }
+  }
+
+  CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
+
+  // Then loop over the entries in the current state.
+  for (CStringLength::EntryMap::iterator I = Entries.begin(),
+       E = Entries.end(); I != E; ++I) {
+    const MemRegion *MR = I.getKey();
+
+    // Is this entry for a super-region of a changed region?
+    if (SuperRegions.count(MR)) {
+      Entries = F.Remove(Entries, MR);
+      continue;
+    }
+
+    // Is this entry for a sub-region of a changed region?
+    const MemRegion *Super = MR;
+    while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
+      Super = SR->getSuperRegion();
+      if (Invalidated.count(Super)) {
+        Entries = F.Remove(Entries, MR);
+        break;
+      }
+    }
+  }
+
+  return state->set<CStringLength>(Entries);
+}
+
+void CStringChecker::MarkLiveSymbols(const GRState *state, SymbolReaper &SR) {
+  // Mark all symbols in our string length map as valid.
+  CStringLength::EntryMap Entries = state->get<CStringLength>();
+
+  for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
+       I != E; ++I) {
+    SVal Len = I.getData();
+    if (SymbolRef Sym = Len.getAsSymbol())
+      SR.markInUse(Sym);
+  }
+}
+
+void CStringChecker::EvalDeadSymbols(CheckerContext &C, SymbolReaper &SR) {
+  if (!SR.hasDeadSymbols())
+    return;
+
+  const GRState *state = C.getState();
+  CStringLength::EntryMap Entries = state->get<CStringLength>();
+  if (Entries.isEmpty())
+    return;
+
+  CStringLength::EntryMap::Factory &F = state->get_context<CStringLength>();
+  for (CStringLength::EntryMap::iterator I = Entries.begin(), E = Entries.end();
+       I != E; ++I) {
+    SVal Len = I.getData();
+    if (SymbolRef Sym = Len.getAsSymbol()) {
+      if (SR.isDead(Sym))
+        Entries = F.Remove(Entries, I.getKey());
+    }
+  }
+
+  state = state->set<CStringLength>(Entries);
+  C.GenerateNode(state);
+}

Modified: cfe/trunk/test/Analysis/string.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/string.c?rev=111081&r1=111080&r2=111081&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/string.c (original)
+++ cfe/trunk/test/Analysis/string.c Sat Aug 14 16:02:52 2010
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -analyze -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
-// RUN: %clang_cc1 -analyze -DUSE_BUILTINS -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
-// RUN: %clang_cc1 -analyze -DVARIANT -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
-// RUN: %clang_cc1 -analyze -DUSE_BUILTINS -DVARIANT -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
+// RUN: %clang_cc1 -analyze -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
+// RUN: %clang_cc1 -analyze -DUSE_BUILTINS -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
+// RUN: %clang_cc1 -analyze -DVARIANT -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
+// RUN: %clang_cc1 -analyze -DUSE_BUILTINS -DVARIANT -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
 
 //===----------------------------------------------------------------------===
 // Declarations
@@ -35,17 +35,19 @@
 
 void strlen_constant0() {
   if (strlen("123") != 3)
-    (void)*(char*)0;
+    (void)*(char*)0; // no-warning
 }
 
 void strlen_constant1() {
   const char *a = "123";
   if (strlen(a) != 3)
-    (void)*(char*)0;
+    (void)*(char*)0; // no-warning
 }
 
 void strlen_constant2(char x) {
   char a[] = "123";
+  if (strlen(a) != 3)
+    (void)*(char*)0; // no-warning
   a[0] = x;
   if (strlen(a) != 3)
     (void)*(char*)0; // expected-warning{{null}}
@@ -63,3 +65,75 @@
 label:
   return strlen((char*)&&label); // expected-warning{{Argument to byte string function is the address of the label 'label', which is not a null-terminated string}}
 }
+
+void strlen_subregion() {
+  struct two_strings { char a[2], b[2] };
+  extern void use_two_strings(struct two_strings *);
+
+  struct two_strings z;
+  use_two_strings(&z);
+
+  size_t a = strlen(z.a);
+  z.b[0] = 5;
+  size_t b = strlen(z.a);
+  if (a == 0 && b != 0)
+    (void)*(char*)0; // expected-warning{{never executed}}
+
+  use_two_strings(&z);
+
+  size_t c = strlen(z.a);
+  if (a == 0 && c != 0)
+    (void)*(char*)0; // expected-warning{{null}}
+}
+
+extern void use_string(char *);
+void strlen_argument(char *x) {
+  size_t a = strlen(x);
+  size_t b = strlen(x);
+  if (a == 0 && b != 0)
+    (void)*(char*)0; // expected-warning{{never executed}}
+
+  use_string(x);
+
+  size_t c = strlen(x);
+  if (a == 0 && c != 0)
+    (void)*(char*)0; // expected-warning{{null}}  
+}
+
+extern char global_str[];
+void strlen_global() {
+  size_t a = strlen(global_str);
+  size_t b = strlen(global_str);
+  if (a == 0 && b != 0)
+    (void)*(char*)0; // expected-warning{{never executed}}
+
+  // Call a function with unknown effects, which should invalidate globals.
+  use_string(0);
+
+  size_t c = strlen(global_str);
+  if (a == 0 && c != 0)
+    (void)*(char*)0; // expected-warning{{null}}  
+}
+
+void strlen_indirect(char *x) {
+  size_t a = strlen(x);
+  char *p = x;
+  char **p2 = &p;
+  size_t b = strlen(x);
+  if (a == 0 && b != 0)
+    (void)*(char*)0; // expected-warning{{never executed}}
+
+  extern void use_string_ptr(char*const*);
+  use_string_ptr(p2);
+
+  size_t c = strlen(x);
+  if (a == 0 && c != 0)
+    (void)*(char*)0; // expected-warning{{null}}
+}
+
+void strlen_liveness(const char *x) {
+  if (strlen(x) < 5)
+    return;
+  if (strlen(x) < 5)
+    (void)*(char*)0; // no-warning
+}





More information about the cfe-commits mailing list