[cfe-commits] r111120 - in /cfe/trunk: lib/Checker/CStringChecker.cpp test/Analysis/bstring.c test/Analysis/string.c

Jordy Rose jediknil at belkadan.com
Mon Aug 16 00:51:42 PDT 2010


Author: jrose
Date: Mon Aug 16 02:51:42 2010
New Revision: 111120

URL: http://llvm.org/viewvc/llvm-project?rev=111120&view=rev
Log:
Model the effects of strcpy() and stpcpy() in CStringChecker. Other changes:
- Fix memcpy() and friends to actually invalidate the destination buffer.
- Emit a different message for out-of-bounds buffer accesses if the buffer is being written to.
- When conjuring symbols, let ValueManager figure out the type.

Modified:
    cfe/trunk/lib/Checker/CStringChecker.cpp
    cfe/trunk/test/Analysis/bstring.c
    cfe/trunk/test/Analysis/string.c

Modified: cfe/trunk/lib/Checker/CStringChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Checker/CStringChecker.cpp?rev=111120&r1=111119&r2=111120&view=diff
==============================================================================
--- cfe/trunk/lib/Checker/CStringChecker.cpp (original)
+++ cfe/trunk/lib/Checker/CStringChecker.cpp Mon Aug 16 02:51:42 2010
@@ -22,10 +22,11 @@
 
 namespace {
 class CStringChecker : public CheckerVisitor<CStringChecker> {
-  BugType *BT_Null, *BT_Bounds, *BT_Overlap, *BT_NotCString;
+  BugType *BT_Null, *BT_Bounds, *BT_BoundsWrite, *BT_Overlap, *BT_NotCString;
 public:
   CStringChecker()
-  : BT_Null(0), BT_Bounds(0), BT_Overlap(0), BT_NotCString(0) {}
+  : BT_Null(0), BT_Bounds(0), BT_BoundsWrite(0), BT_Overlap(0), BT_NotCString(0)
+  {}
   static void *getTag() { static int tag; return &tag; }
 
   bool EvalCallExpr(CheckerContext &C, const CallExpr *CE);
@@ -52,15 +53,24 @@
 
   void EvalStrlen(CheckerContext &C, const CallExpr *CE);
 
+  void EvalStrcpy(CheckerContext &C, const CallExpr *CE);
+  void EvalStpcpy(CheckerContext &C, const CallExpr *CE);
+  void EvalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd);
+
   // Utility methods
   std::pair<const GRState*, const GRState*>
   AssumeZero(CheckerContext &C, const GRState *state, SVal V, QualType Ty);
 
+  const GRState *SetCStringLength(const GRState *state, const MemRegion *MR,
+                                  SVal StrLen);
   SVal GetCStringLengthForRegion(CheckerContext &C, const GRState *&state,
                                  const Expr *Ex, const MemRegion *MR);
   SVal GetCStringLength(CheckerContext &C, const GRState *&state,
                         const Expr *Ex, SVal Buf);
 
+  const GRState *InvalidateBuffer(CheckerContext &C, const GRState *state,
+                                  const Expr *Ex, SVal V);
+
   bool SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
                        const MemRegion *MR);
 
@@ -68,11 +78,13 @@
   const GRState *CheckNonNull(CheckerContext &C, const GRState *state,
                                const Expr *S, SVal l);
   const GRState *CheckLocation(CheckerContext &C, const GRState *state,
-                               const Expr *S, SVal l);
+                               const Expr *S, SVal l,
+                               bool IsDestination = false);
   const GRState *CheckBufferAccess(CheckerContext &C, const GRState *state,
                                    const Expr *Size,
                                    const Expr *FirstBuf,
-                                   const Expr *SecondBuf = NULL);
+                                   const Expr *SecondBuf = NULL,
+                                   bool FirstIsDestination = false);
   const GRState *CheckOverlap(CheckerContext &C, const GRState *state,
                               const Expr *Size, const Expr *First,
                               const Expr *Second);
@@ -156,7 +168,8 @@
 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
 const GRState *CStringChecker::CheckLocation(CheckerContext &C,
                                              const GRState *state,
-                                             const Expr *S, SVal l) {
+                                             const Expr *S, SVal l,
+                                             bool IsDestination) {
   // If a previous check has failed, propagate the failure.
   if (!state)
     return NULL;
@@ -189,17 +202,26 @@
     if (!N)
       return NULL;
 
-    if (!BT_Bounds)
-      BT_Bounds = new BuiltinBug("Out-of-bound array access",
-        "Byte string function accesses out-of-bound array element "
-        "(buffer overflow)");
+    BuiltinBug *BT;
+    if (IsDestination) {
+      if (!BT_BoundsWrite) {
+        BT_BoundsWrite = new BuiltinBug("Out-of-bound array access",
+          "Byte string function overflows destination buffer");
+      }
+      BT = static_cast<BuiltinBug*>(BT_BoundsWrite);
+    } else {
+      if (!BT_Bounds) {
+        BT_Bounds = new BuiltinBug("Out-of-bound array access",
+          "Byte string function accesses out-of-bound array element");
+      }
+      BT = static_cast<BuiltinBug*>(BT_Bounds);
+    }
 
     // FIXME: It would be nice to eventually make this diagnostic more clear,
     // e.g., by referencing the original declaration or by saying *why* this
     // reference is outside the range.
 
     // Generate a report for this bug.
-    BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds);
     RangedBugReport *report = new RangedBugReport(*BT, BT->getDescription(), N);
 
     report->addRange(S->getSourceRange());
@@ -216,7 +238,8 @@
                                                  const GRState *state,
                                                  const Expr *Size,
                                                  const Expr *FirstBuf,
-                                                 const Expr *SecondBuf) {
+                                                 const Expr *SecondBuf,
+                                                 bool FirstIsDestination) {
   // If a previous check has failed, propagate the failure.
   if (!state)
     return NULL;
@@ -250,7 +273,7 @@
   if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
     SVal BufEnd = SV.EvalBinOpLN(state, BinaryOperator::Add, *BufLoc,
                                  LastOffset, PtrTy);
-    state = CheckLocation(C, state, FirstBuf, BufEnd);
+    state = CheckLocation(C, state, FirstBuf, BufEnd, FirstIsDestination);
 
     // If the buffer isn't large enough, abort.
     if (!state)
@@ -407,6 +430,42 @@
   C.EmitReport(report);
 }
 
+const GRState *CStringChecker::SetCStringLength(const GRState *state,
+                                                const MemRegion *MR,
+                                                SVal StrLen) {
+  assert(!StrLen.isUndef() && "Attempt to set an undefined string length");
+  if (StrLen.isUnknown())
+    return state;
+
+  MR = MR->StripCasts();
+
+  switch (MR->getKind()) {
+  case MemRegion::StringRegionKind:
+    // FIXME: This can happen if we strcpy() into a string region. This is
+    // undefined [C99 6.4.5p6], but we should still warn about it.
+    return state;
+
+  case MemRegion::SymbolicRegionKind:
+  case MemRegion::AllocaRegionKind:
+  case MemRegion::VarRegionKind:
+  case MemRegion::FieldRegionKind:
+  case MemRegion::ObjCIvarRegionKind:
+    return state->set<CStringLength>(MR, StrLen);
+
+  case MemRegion::ElementRegionKind:
+    // FIXME: Handle element regions by upper-bounding the parent region's
+    // string length.
+    return state;
+
+  default:
+    // Other regions (mostly non-data) can't have a reliable C string length.
+    // For now, just ignore the change.
+    // FIXME: These are rare but not impossible. We should output some kind of
+    // warning for things like strcpy((char[]){'a', 0}, "b");
+    return state;
+  }
+}
+
 SVal CStringChecker::GetCStringLengthForRegion(CheckerContext &C,
                                                const GRState *&state,
                                                const Expr *Ex,
@@ -517,6 +576,37 @@
   }
 }
 
+const GRState *CStringChecker::InvalidateBuffer(CheckerContext &C,
+                                                const GRState *state,
+                                                const Expr *E, SVal V) {
+  Loc *L = dyn_cast<Loc>(&V);
+  if (!L)
+    return state;
+
+  // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
+  // some assumptions about the value that CFRefCount can't. Even so, it should
+  // probably be refactored.
+  if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
+    const MemRegion *R = MR->getRegion()->StripCasts();
+
+    // Are we dealing with an ElementRegion?  If so, we should be invalidating
+    // the super-region.
+    if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
+      R = ER->getSuperRegion();
+      // FIXME: What about layers of ElementRegions?
+    }
+
+    // Invalidate this region.
+    unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
+    return state->InvalidateRegion(R, E, Count, NULL);
+  }
+
+  // If we have a non-region value by chance, just remove the binding.
+  // FIXME: is this necessary or correct? This handles the non-Region
+  //  cases.  Is it ever valid to store to these?
+  return state->unbindLoc(*L);
+}
+
 bool CStringChecker::SummarizeRegion(llvm::raw_ostream& os, ASTContext& Ctx,
                                      const MemRegion *MR) {
   const TypedRegion *TR = dyn_cast<TypedRegion>(MR);
@@ -577,11 +667,20 @@
   // If the size can be nonzero, we have to check the other arguments.
   if (StNonZeroSize) {
     state = StNonZeroSize;
-    state = CheckBufferAccess(C, state, Size, Dest, Source);
+    state = CheckBufferAccess(C, state, Size, Dest, Source,
+                              /* FirstIsDst = */ true);
     if (Restricted)
       state = CheckOverlap(C, state, Size, Dest, Source);
-    if (state)
+
+    if (state) {
+      // Invalidate the destination.
+      // FIXME: Even if we can't perfectly model the copy, we should see if we
+      // can use LazyCompoundVals to copy the source values into the destination.
+      // This would probably remove any existing bindings past the end of the
+      // copied region, but that's still an improvement over blank invalidation.
+      state = InvalidateBuffer(C, state, Dest, state->getSVal(Dest));
       C.addTransition(state);
+    }
   }
 }
 
@@ -668,7 +767,7 @@
       if (state) {
         // The return value is the comparison result, which we don't know.
         unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
-        SVal CmpV = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count);
+        SVal CmpV = ValMgr.getConjuredSymbolVal(NULL, CE, Count);
         state = state->BindExpr(CE, CmpV);
         C.addTransition(state);
       }
@@ -698,7 +797,7 @@
     if (StrLen.isUnknown()) {
       ValueManager &ValMgr = C.getValueManager();
       unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
-      StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count);
+      StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, Count);
     }
 
     // Bind the return value.
@@ -707,6 +806,90 @@
   }
 }
 
+void CStringChecker::EvalStrcpy(CheckerContext &C, const CallExpr *CE) {
+  // char *strcpy(char *restrict dst, const char *restrict src);
+  EvalStrcpyCommon(C, CE, /* ReturnEnd = */ false);
+}
+
+void CStringChecker::EvalStpcpy(CheckerContext &C, const CallExpr *CE) {
+  // char *stpcpy(char *restrict dst, const char *restrict src);
+  EvalStrcpyCommon(C, CE, /* ReturnEnd = */ true);
+}
+
+void CStringChecker::EvalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
+                                      bool ReturnEnd) {
+  const GRState *state = C.getState();
+
+  // Check that the destination is non-null
+  const Expr *Dst = CE->getArg(0);
+  SVal DstVal = state->getSVal(Dst);
+
+  state = CheckNonNull(C, state, Dst, DstVal);
+  if (!state)
+    return;
+
+  // Check that the source is non-null.
+  const Expr *Src = CE->getArg(1);
+  SVal SrcVal = state->getSVal(Src);
+
+  state = CheckNonNull(C, state, Src, SrcVal);
+  if (!state)
+    return;
+
+  // Get the string length of the source.
+  SVal StrLen = GetCStringLength(C, state, Src, SrcVal);
+
+  // If the source isn't a valid C string, give up.
+  if (StrLen.isUndef())
+    return;
+
+  SVal Result = (ReturnEnd ? UnknownVal() : DstVal);
+
+  // If the destination is a MemRegion, try to check for a buffer overflow and
+  // record the new string length.
+  if (loc::MemRegionVal *DstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
+    // If the length is known, we can check for an overflow.
+    if (NonLoc *KnownStrLen = dyn_cast<NonLoc>(&StrLen)) {
+      SValuator &SV = C.getSValuator();
+
+      SVal LastElement = SV.EvalBinOpLN(state, BinaryOperator::Add,
+                                        *DstRegVal, *KnownStrLen,
+                                        Dst->getType());
+
+      state = CheckLocation(C, state, Dst, LastElement, /* IsDst = */ true);
+      if (!state)
+        return;
+
+      // If this is a stpcpy-style copy, the last element is the return value.
+      if (ReturnEnd)
+        Result = LastElement;
+    }
+
+    // Invalidate the destination. This must happen before we set the C string
+    // length because invalidation will clear the length.
+    // FIXME: Even if we can't perfectly model the copy, we should see if we
+    // can use LazyCompoundVals to copy the source values into the destination.
+    // This would probably remove any existing bindings past the end of the
+    // string, but that's still an improvement over blank invalidation.
+    state = InvalidateBuffer(C, state, Dst, *DstRegVal);
+
+    // Set the C string length of the destination.
+    state = SetCStringLength(state, DstRegVal->getRegion(), StrLen);
+  }
+
+  // If this is a stpcpy-style copy, but we were unable to check for a buffer
+  // overflow, we still need a result. Conjure a return value.
+  if (ReturnEnd && Result.isUnknown()) {
+    ValueManager &ValMgr = C.getValueManager();
+    unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
+    StrLen = ValMgr.getConjuredSymbolVal(NULL, CE, Count);
+  }
+
+  // Set the return value.
+  state = state->BindExpr(CE, Result);
+  C.addTransition(state);
+}
+
 //===----------------------------------------------------------------------===//
 // The driver method, and other Checker callbacks.
 //===----------------------------------------------------------------------===//
@@ -730,6 +913,8 @@
     .Cases("memcpy", "__memcpy_chk", &CStringChecker::EvalMemcpy)
     .Cases("memcmp", "bcmp", &CStringChecker::EvalMemcmp)
     .Cases("memmove", "__memmove_chk", &CStringChecker::EvalMemmove)
+    .Cases("strcpy", "__strcpy_chk", &CStringChecker::EvalStrcpy)
+    .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::EvalStpcpy)
     .Case("strlen", &CStringChecker::EvalStrlen)
     .Case("bcopy", &CStringChecker::EvalBcopy)
     .Default(NULL);

Modified: cfe/trunk/test/Analysis/bstring.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/bstring.c?rev=111120&r1=111119&r2=111120&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/bstring.c (original)
+++ cfe/trunk/test/Analysis/bstring.c Mon Aug 16 02:51:42 2010
@@ -48,27 +48,30 @@
 
 void memcpy0 () {
   char src[] = {1, 2, 3, 4};
-  char dst[4];
+  char dst[4] = {0};
 
   memcpy(dst, src, 4); // no-warning
 
   if (memcpy(dst, src, 4) != dst) {
     (void)*(char*)0; // no-warning
   }
+
+  if (dst[0] != 0)
+    (void)*(char*)0; // expected-warning{{null}}
 }
 
 void memcpy1 () {
   char src[] = {1, 2, 3, 4};
   char dst[10];
 
-  memcpy(dst, src, 5); // expected-warning{{out-of-bound}}
+  memcpy(dst, src, 5); // expected-warning{{Byte string function accesses out-of-bound array element}}
 }
 
 void memcpy2 () {
   char src[] = {1, 2, 3, 4};
   char dst[1];
 
-  memcpy(dst, src, 4); // expected-warning{{out-of-bound}}
+  memcpy(dst, src, 4); // expected-warning{{Byte string function overflows destination buffer}}
 }
 
 void memcpy3 () {
@@ -82,14 +85,14 @@
   char src[] = {1, 2, 3, 4};
   char dst[10];
 
-  memcpy(dst+2, src+2, 3); // expected-warning{{out-of-bound}}
+  memcpy(dst+2, src+2, 3); // expected-warning{{Byte string function accesses out-of-bound array element}}
 }
 
 void memcpy5() {
   char src[] = {1, 2, 3, 4};
   char dst[3];
 
-  memcpy(dst+2, src+2, 2); // expected-warning{{out-of-bound}}
+  memcpy(dst+2, src+2, 2); // expected-warning{{Byte string function overflows destination buffer}}
 }
 
 void memcpy6() {
@@ -150,13 +153,16 @@
 
 void memmove0 () {
   char src[] = {1, 2, 3, 4};
-  char dst[4];
+  char dst[4] = {0};
 
   memmove(dst, src, 4); // no-warning
 
   if (memmove(dst, src, 4) != dst) {
     (void)*(char*)0; // no-warning
   }
+
+  if (dst[0] != 0)
+    (void)*(char*)0; // expected-warning{{null}}
 }
 
 void memmove1 () {
@@ -170,7 +176,7 @@
   char src[] = {1, 2, 3, 4};
   char dst[1];
 
-  memmove(dst, src, 4); // expected-warning{{out-of-bound}}
+  memmove(dst, src, 4); // expected-warning{{overflow}}
 }
 
 //===----------------------------------------------------------------------===
@@ -263,9 +269,12 @@
 
 void bcopy0 () {
   char src[] = {1, 2, 3, 4};
-  char dst[4];
+  char dst[4] = {0};
 
   bcopy(src, dst, 4); // no-warning
+
+  if (dst[0] != 0)
+    (void)*(char*)0; // expected-warning{{null}}
 }
 
 void bcopy1 () {
@@ -279,5 +288,5 @@
   char src[] = {1, 2, 3, 4};
   char dst[1];
 
-  bcopy(src, dst, 4); // expected-warning{{out-of-bound}}
+  bcopy(src, dst, 4); // expected-warning{{overflow}}
 }

Modified: cfe/trunk/test/Analysis/string.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/string.c?rev=111120&r1=111119&r2=111120&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/string.c (original)
+++ cfe/trunk/test/Analysis/string.c Mon Aug 16 02:51:42 2010
@@ -24,6 +24,7 @@
 # define BUILTIN(f) f
 #endif /* USE_BUILTINS */
 
+#define NULL 0
 typedef typeof(sizeof(int)) size_t;
 
 //===----------------------------------------------------------------------===
@@ -137,3 +138,103 @@
   if (strlen(x) < 5)
     (void)*(char*)0; // no-warning
 }
+
+//===----------------------------------------------------------------------===
+// strcpy()
+//===----------------------------------------------------------------------===
+
+#ifdef VARIANT
+
+#define __strcpy_chk BUILTIN(__strcpy_chk)
+char *__strcpy_chk(char *restrict s1, const char *restrict s2, size_t destlen);
+
+#define strcpy(a,b) __strcpy_chk(a,b,(size_t)-1)
+
+#else /* VARIANT */
+
+#define strcpy BUILTIN(strcpy)
+char *strcpy(char *restrict s1, const char *restrict s2);
+
+#endif /* VARIANT */
+
+
+void strcpy_null_dst(char *x) {
+  strcpy(NULL, x); // expected-warning{{Null pointer argument in call to byte string function}}
+}
+
+void strcpy_null_src(char *x) {
+  strcpy(x, NULL); // expected-warning{{Null pointer argument in call to byte string function}}
+}
+
+void strcpy_fn(char *x) {
+  strcpy(x, (char*)&strcpy_fn); // expected-warning{{Argument to byte string function is the address of the function 'strcpy_fn', which is not a null-terminated string}}
+}
+
+void strcpy_effects(char *x, char *y) {
+  char a = x[0];
+
+  if (strcpy(x, y) != x)
+    (void)*(char*)0; // no-warning
+
+  if (strlen(x) != strlen(y))
+    (void)*(char*)0; // no-warning
+
+  if (a != x[0])
+    (void)*(char*)0; // expected-warning{{null}}
+}
+
+void strcpy_overflow(char *y) {
+  char x[4];
+  if (strlen(y) == 4)
+    strcpy(x, y); // expected-warning{{Byte string function overflows destination buffer}}
+}
+
+void strcpy_no_overflow(char *y) {
+  char x[4];
+  if (strlen(y) == 3)
+    strcpy(x, y); // no-warning
+}
+
+//===----------------------------------------------------------------------===
+// stpcpy()
+//===----------------------------------------------------------------------===
+
+#ifdef VARIANT
+
+#define __stpcpy_chk BUILTIN(__stpcpy_chk)
+char *__stpcpy_chk(char *restrict s1, const char *restrict s2, size_t destlen);
+
+#define stpcpy(a,b) __stpcpy_chk(a,b,(size_t)-1)
+
+#else /* VARIANT */
+
+#define stpcpy BUILTIN(stpcpy)
+char *stpcpy(char *restrict s1, const char *restrict s2);
+
+#endif /* VARIANT */
+
+
+void stpcpy_effect(char *x, char *y) {
+  char a = x[0];
+
+  if (stpcpy(x, y) != &x[strlen(y)])
+    (void)*(char*)0; // no-warning
+
+  if (strlen(x) != strlen(y))
+    (void)*(char*)0; // no-warning
+
+  if (a != x[0])
+    (void)*(char*)0; // expected-warning{{null}}
+}
+
+void stpcpy_overflow(char *y) {
+  char x[4];
+  if (strlen(y) == 4)
+    stpcpy(x, y); // expected-warning{{Byte string function overflows destination buffer}}
+}
+
+void stpcpy_no_overflow(char *y) {
+  char x[4];
+  if (strlen(y) == 3)
+    stpcpy(x, y); // no-warning
+}





More information about the cfe-commits mailing list