[cfe-commits] r109480 - in /cfe/trunk: lib/Checker/CStringChecker.cpp test/Analysis/string.c

Jordy Rose jediknil at belkadan.com
Mon Jul 26 18:37:32 PDT 2010


Author: jrose
Date: Mon Jul 26 20:37:31 2010
New Revision: 109480

URL: http://llvm.org/viewvc/llvm-project?rev=109480&view=rev
Log:
Groundwork for C string length tracking. Currently only handles the length of constant string literals, which is not too helpful, and only calls to strlen() are checked.

Added:
    cfe/trunk/test/Analysis/string.c
Modified:
    cfe/trunk/lib/Checker/CStringChecker.cpp

Modified: cfe/trunk/lib/Checker/CStringChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Checker/CStringChecker.cpp?rev=109480&r1=109479&r2=109480&view=diff
==============================================================================
--- cfe/trunk/lib/Checker/CStringChecker.cpp (original)
+++ cfe/trunk/lib/Checker/CStringChecker.cpp Mon Jul 26 20:37:31 2010
@@ -21,10 +21,10 @@
 
 namespace {
 class CStringChecker : public CheckerVisitor<CStringChecker> {
-  BugType *BT_Null, *BT_Bounds, *BT_Overlap;
+  BugType *BT_Null, *BT_Bounds, *BT_Overlap, *BT_NotCString;
 public:
   CStringChecker()
-  : BT_Null(0), BT_Bounds(0), BT_Overlap(0) {}
+  : BT_Null(0), BT_Bounds(0), BT_Overlap(0), BT_NotCString(0) {}
   static void *getTag() { static int tag; return &tag; }
 
   bool EvalCallExpr(CheckerContext &C, const CallExpr *CE);
@@ -40,10 +40,19 @@
 
   void EvalMemcmp(CheckerContext &C, const CallExpr *CE);
 
+  void EvalStrlen(CheckerContext &C, const CallExpr *CE);
+
   // Utility methods
   std::pair<const GRState*, const GRState*>
   AssumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty);
 
+  SVal GetCStringLength(CheckerContext &C, const GRState *state,
+                        const Expr *Ex, SVal Buf);
+
+  bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
+                       const MemRegion *MR);
+
+  // Re-usable checks
   const GRState *CheckNonNull(CheckerContext &C, const GRState *state,
                                const Expr *S, SVal l);
   const GRState *CheckLocation(CheckerContext &C, const GRState *state,
@@ -369,6 +378,162 @@
   C.EmitReport(report);
 }
 
+SVal CStringChecker::GetCStringLength(CheckerContext &C, const GRState *state,
+                                      const Expr *Ex, SVal Buf) {
+  const MemRegion *MR = Buf.getAsRegion();
+  if (!MR) {
+    // If we can't get a region, see if it's something we /know/ isn't a
+    // C string. In the context of locations, the only time we can issue such
+    // a warning is for labels.
+    if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
+      ExplodedNode *N = C.GenerateSink(state);
+      if (N) {
+        if (!BT_NotCString)
+          BT_NotCString = new BuiltinBug("API",
+            "Argument is not a null-terminated string.");
+
+        llvm::SmallString<120> buf;
+        llvm::raw_svector_ostream os(buf);
+        os << "Argument to byte string function is the address of the label '"
+           << Label->getLabel()->getID()->getName()
+           << "', which is not a null-terminated string";
+
+        // Generate a report for this bug.
+        EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
+                                                          os.str(), N);
+
+        report->addRange(Ex->getSourceRange());
+        C.EmitReport(report);        
+      }
+
+      return UndefinedVal();
+    }
+
+    // If it's not a region and not a label, it may be a constant location,
+    // or it may be unknown. Just conjure a value as usual (see end of method).
+
+  } else {
+    // If we have a region, strip casts from it and see if we can figure out
+    // its length. For anything we can't figure out, just conjure a value as
+    // usual (see end of method).
+    MR = MR->StripCasts();
+
+    switch (MR->getKind()) {
+    case MemRegion::StringRegionKind: {
+      ValueManager &ValMgr = C.getValueManager();
+      ASTContext &Ctx = ValMgr.getContext();
+      const StringLiteral *Str = cast<StringRegion>(MR)->getStringLiteral();
+
+      // Non-constant string literals may have been changed, so only return a
+      // known value if we know the literal is constant.
+      if (Str->getType().isConstant(Ctx)) {
+        QualType SizeTy = Ctx.getSizeType();
+        return ValMgr.makeIntVal(Str->getByteLength(), SizeTy);        
+      }
+
+      // FIXME: Handle the non-constant case. For now, just treat it like any
+      // other initialized region.
+      // FALL-THROUGH
+    }
+    case MemRegion::SymbolicRegionKind:
+    case MemRegion::AllocaRegionKind:
+    case MemRegion::VarRegionKind:
+    case MemRegion::FieldRegionKind:
+    case MemRegion::ObjCIvarRegionKind:
+      // FIXME: These need to be tracked!
+      break;
+    case MemRegion::CompoundLiteralRegionKind:
+      // FIXME: Can we track this? Is it necessary?
+      break;
+    case MemRegion::ElementRegionKind:
+      // FIXME: How can we handle this? It's not good enough to subtract the
+      // offset from the base string length; consider "123\x00567" and &a[5].
+      break;
+    default: {
+      // Other regions (mostly non-data) can't have a reliable C string length.
+      // In this case, an error is emitted and UndefinedVal is returned.
+      // The caller should always be prepared to handle this case.
+      ExplodedNode *N = C.GenerateSink(state);
+      if (N) {
+        if (!BT_NotCString)
+          BT_NotCString = new BuiltinBug("API",
+            "Argument is not a null-terminated string.");
+
+        llvm::SmallString<120> buf;
+        llvm::raw_svector_ostream os(buf);
+
+        os << "Argument to byte string function is ";
+
+        if (SummarizeRegion(os, C.getASTContext(), MR)) {
+          os << ", which is not a null-terminated string";
+        } else {
+          os << "not a null-terminated string";
+        }
+
+        // Generate a report for this bug.
+        EnhancedBugReport *report = new EnhancedBugReport(*BT_NotCString,
+                                                          os.str(), N);
+
+        report->addRange(Ex->getSourceRange());
+        C.EmitReport(report);        
+      }
+
+      return UndefinedVal();
+    }
+    }
+  }
+
+  // If we can't track a certain region's C string length, or if we can't get a
+  // region from the SVal, conjure a value, for use in later constraints.
+  unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
+  ValueManager &ValMgr = C.getValueManager();
+  QualType SizeTy = ValMgr.getContext().getSizeType();
+  return ValMgr.getConjuredSymbolVal(getTag(), Ex, SizeTy, Count);
+}
+
+bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
+                                     const MemRegion *MR) {
+  const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
+  if (!TR)
+    return false;
+
+  switch (TR->getKind()) {
+  case MemRegion::FunctionTextRegionKind: {
+    const FunctionDecl *FD = cast<FunctionTextRegion>(TR)->getDecl();
+    if (FD)
+      os << "the address of the function '" << FD << "'";
+    else
+      os << "the address of a function";
+    return true;
+  }
+  case MemRegion::BlockTextRegionKind:
+    os << "block text";
+    return true;
+  case MemRegion::BlockDataRegionKind:
+    os << "a block";
+    return true;
+  case MemRegion::CXXThisRegionKind:
+  case MemRegion::CXXObjectRegionKind:
+    os << "a C++ object of type "
+       << TR->getValueType(Ctx).getAsString();
+    return true;
+  case MemRegion::VarRegionKind:
+    os << "a variable of type"
+       << TR->getValueType(Ctx).getAsString();
+    return true;
+  case MemRegion::FieldRegionKind:
+    os << "a field of type "
+       << TR->getValueType(Ctx).getAsString();
+    return true;
+  case MemRegion::ObjCIvarRegionKind:
+    os << "an instance variable of type "
+       << TR->getValueType(Ctx).getAsString();
+    return true;
+  default:
+    return false;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Evaluation of individual function calls.
 //===----------------------------------------------------------------------===//
@@ -489,6 +654,27 @@
   }
 }
 
+void CStringChecker::EvalStrlen(CheckerContext &C, const CallExpr *CE) {
+  // size_t strlen(const char *s);
+  const GRState *state = C.getState();
+  const Expr *Arg = CE->getArg(0);
+  SVal ArgVal = state->getSVal(Arg);
+
+  // Check that the argument is non-null.
+  state = CheckNonNull(C, state, Arg, ArgVal);
+
+  if (state) {
+    // Figure out what the length is, making sure the argument is a C string
+    // (or something similar to a C string). If the argument is valid, the
+    // length will be defined, and we can then set the return value.
+    SVal StrLen = GetCStringLength(C, state, Arg, ArgVal);
+    if (!StrLen.isUndef()) {
+      state = state->BindExpr(CE, StrLen);
+      C.addTransition(state);
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // The driver method.
 //===----------------------------------------------------------------------===//
@@ -512,6 +698,7 @@
     .Cases("memcpy", "__memcpy_chk", &CStringChecker::EvalMemcpy)
     .Cases("memcmp", "bcmp", &CStringChecker::EvalMemcmp)
     .Cases("memmove", "__memmove_chk", &CStringChecker::EvalMemmove)
+    .Case("strlen", &CStringChecker::EvalStrlen)
     .Case("bcopy", &CStringChecker::EvalBcopy)
     .Default(NULL);
 

Added: cfe/trunk/test/Analysis/string.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/string.c?rev=109480&view=auto
==============================================================================
--- cfe/trunk/test/Analysis/string.c (added)
+++ cfe/trunk/test/Analysis/string.c Mon Jul 26 20:37:31 2010
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -analyze -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
+// RUN: %clang_cc1 -analyze -DUSE_BUILTINS -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
+// RUN: %clang_cc1 -analyze -DVARIANT -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
+// RUN: %clang_cc1 -analyze -DUSE_BUILTINS -DVARIANT -Wwrite-strings -analyzer-experimental-internal-checks -analyzer-check-objc-mem -analyzer-store=region -analyzer-experimental-checks -verify %s
+
+//===----------------------------------------------------------------------===
+// Declarations
+//===----------------------------------------------------------------------===
+
+// Some functions are so similar to each other that they follow the same code
+// path, such as memcpy and __memcpy_chk, or memcmp and bcmp. If VARIANT is
+// defined, make sure to use the variants instead to make sure they are still
+// checked by the analyzer.
+
+// Some functions are implemented as builtins. These should be #defined as
+// BUILTIN(f), which will prepend "__builtin_" if USE_BUILTINS is defined.
+
+// Functions that have variants and are also availabe as builtins should be
+// declared carefully! See memcpy() for an example.
+
+#ifdef USE_BUILTINS
+# define BUILTIN(f) __builtin_ ## f
+#else /* USE_BUILTINS */
+# define BUILTIN(f) f
+#endif /* USE_BUILTINS */
+
+typedef typeof(sizeof(int)) size_t;
+
+//===----------------------------------------------------------------------===
+// strlen()
+//===----------------------------------------------------------------------===
+
+#define strlen BUILTIN(strlen)
+size_t strlen(const char *s);
+
+void strlen_constant0() {
+  if (strlen("123") != 3)
+    (void)*(char*)0; // expected-warning{{never executed}}
+}
+
+void strlen_constant1() {
+  const char *a = "123";
+  if (strlen(a) != 3)
+    (void)*(char*)0; // expected-warning{{never executed}}
+}
+
+void strlen_constant2(char x) {
+  char a[] = "123";
+  a[0] = x;
+  if (strlen(a) != 3)
+    (void)*(char*)0; // expected-warning{{null}}
+}
+
+size_t strlen_null() {
+  return strlen(0); // expected-warning{{Null pointer argument in call to byte string function}}
+}
+
+size_t strlen_fn() {
+  return strlen((char*)&strlen_fn); // expected-warning{{Argument to byte string function is the address of the function 'strlen_fn', which is not a null-terminated string}}
+}
+
+size_t strlen_nonloc() {
+label:
+  return strlen((char*)&&label); // expected-warning{{Argument to byte string function is the address of the label 'label', which is not a null-terminated string}}
+}





More information about the cfe-commits mailing list