[cfe-commits] r148010 - in /cfe/trunk: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp test/Analysis/taint-generic.c

Anna Zaks ganna at apple.com
Wed Jan 11 18:22:35 PST 2012


Author: zaks
Date: Wed Jan 11 20:22:34 2012
New Revision: 148010

URL: http://llvm.org/viewvc/llvm-project?rev=148010&view=rev
Log:
[analyzer] Add taint transfer by strcpy & others (part 1).

To simplify the process:
Refactor taint generation checker to simplify passing the
information on which arguments need to be tainted from pre to post
visit.

Todo: We need to factor out the code that sema is using to identify the
string and memcpy functions and use it here and in the CString checker.

Modified:
    cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
    cfe/trunk/test/Analysis/taint-generic.c

Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp?rev=148010&r1=148009&r2=148010&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Wed Jan 11 20:22:34 2012
@@ -20,6 +20,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include <climits>
 
 using namespace clang;
 using namespace ento;
@@ -28,48 +29,42 @@
 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
                                             check::PreStmt<CallExpr> > {
 public:
-  enum TaintOnPreVisitKind {
-    /// No taint propagates from pre-visit to post-visit.
-    PrevisitNone = 0,
-    /// Based on the pre-visit, the return argument of the call
-    /// should be tainted.
-    PrevisitTaintRet = 1,
-    /// Based on the pre-visit, the call can taint values through it's
-    /// pointer/reference arguments.
-    PrevisitTaintArgs = 2
-  };
+  static const unsigned ReturnValueIndex = UINT_MAX;
 
 private:
   mutable llvm::OwningPtr<BugType> BT;
   void initBugType() const;
 
-  /// Add/propagate taint on a post visit.
-  void taintPost(const CallExpr *CE, CheckerContext &C) const;
-  /// Add/propagate taint on a pre visit.
-  void taintPre(const CallExpr *CE, CheckerContext &C) const;
-
-  /// Catch taint related bugs. Check if tainted data is passed to a system
-  /// call etc.
+  /// \brief Catch taint related bugs. Check if tainted data is passed to a
+  /// system call etc.
   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
 
-  /// Given a pointer argument, get the symbol of the value it contains
+  /// \brief Add taint sources on a pre-visit.
+  void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
+
+  /// \brief Propagate taint generated at pre-visit.
+  bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
+
+  /// \brief Add taint sources on a post visit.
+  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
+
+  /// \brief Given a pointer argument, get the symbol of the value it contains
   /// (points to).
   SymbolRef getPointedToSymbol(CheckerContext &C,
                                const Expr *Arg,
-                               bool IssueWarning = true) const;
+                               bool IssueWarning = false) const;
 
   /// Functions defining the attack surface.
   typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
                                                        CheckerContext &C) const;
   const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
-  const ProgramState *postFscanf(const CallExpr *CE, CheckerContext &C) const;
   const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
-  const ProgramState *postDefault(const CallExpr *CE, CheckerContext &C) const;
 
   /// Taint the scanned input if the file is tainted.
   const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
   /// Taint if any of the arguments are tainted.
   const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
+  const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const;
 
   /// Check if the region the expression evaluates to is the standard input,
   /// and thus, is tainted.
@@ -90,20 +85,17 @@
 };
 }
 
-/// Definitions for the checker specific state.
-namespace { struct TaintOnPreVisit {};}
-namespace clang {
-namespace ento {
-  /// A flag which is used to pass information from call pre-visit instruction
-  /// to the call post-visit. The value is an unsigned, which takes on values
-  /// of the TaintOnPreVisitKind enumeration.
-  template<>
-  struct ProgramStateTrait<TaintOnPreVisit> :
-    public ProgramStatePartialTrait<unsigned> {
-    static void *GDMIndex() { return GenericTaintChecker::getTag(); }
-  };
-}
-}
+/// A set which is used to pass information from call pre-visit instruction
+/// to the call post-visit. The values are unsigned integers, which are either
+/// ReturnValueIndex, or indexes of the pointer/reference argument, which
+/// points to data, which should be tainted on return.
+namespace { struct TaintArgsOnPostVisit{}; }
+namespace clang { namespace ento {
+template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
+    :  public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
+  static void *GDMIndex() { return GenericTaintChecker::getTag(); }
+};
+}}
 
 inline void GenericTaintChecker::initBugType() const {
   if (!BT)
@@ -117,23 +109,31 @@
     return;
 
   // Add taint second.
-  taintPre(CE, C);
+  addSourcesPre(CE, C);
 }
 
 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
                                         CheckerContext &C) const {
-  taintPost(CE, C);
+  if (propagateFromPre(CE, C))
+    return;
+  addSourcesPost(CE, C);
 }
 
-void GenericTaintChecker::taintPre(const CallExpr *CE,
-                                   CheckerContext &C) const {
+void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
+                                        CheckerContext &C) const {
   // Set the evaluation function by switching on the callee name.
   StringRef Name = C.getCalleeName(CE);
+  if (Name.empty())
+    return;
   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
-    .Case("fscanf", &GenericTaintChecker::preFscanf)
     .Case("atoi", &GenericTaintChecker::preAnyArgs)
     .Case("atol", &GenericTaintChecker::preAnyArgs)
     .Case("atoll", &GenericTaintChecker::preAnyArgs)
+    .Case("fscanf", &GenericTaintChecker::preFscanf)
+    .Cases("strcpy", "__builtin___strcpy_chk",
+           "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy)
+    .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy)
+    .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy)
     .Default(0);
 
   // Check and evaluate the call.
@@ -146,22 +146,58 @@
   C.addTransition(State);
 }
 
-void GenericTaintChecker::taintPost(const CallExpr *CE,
-                                    CheckerContext &C) const {
+bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
+                                           CheckerContext &C) const {
+  const ProgramState *State = C.getState();
+
+  // Depending on what was tainted at pre-visit, we determined a set of
+  // arguments which should be tainted after the function returns. These are
+  // stored in the state as TaintArgsOnPostVisit set.
+  llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
+  for (llvm::ImmutableSet<unsigned>::iterator
+         I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
+    unsigned ArgNum  = *I;
+
+    // Special handling for the tainted return value.
+    if (ArgNum == ReturnValueIndex) {
+      State = State->addTaint(CE, C.getLocationContext());
+      continue;
+    }
+
+    // The arguments are pointer arguments. The data they are pointing at is
+    // tainted after the call.
+    const Expr* Arg = CE->getArg(ArgNum);
+    SymbolRef Sym = getPointedToSymbol(C, Arg, true);
+    if (Sym)
+      State = State->addTaint(Sym);
+  }
+
+  // Clear up the taint info from the state.
+  State = State->remove<TaintArgsOnPostVisit>();
+
+  if (State != C.getState()) {
+    C.addTransition(State);
+    return true;
+  }
+  return false;
+}
+
+void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
+                                         CheckerContext &C) const {
   // Define the attack surface.
   // Set the evaluation function by switching on the callee name.
   StringRef Name = C.getCalleeName(CE);
+  if (Name.empty())
+    return;
   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
     .Case("scanf", &GenericTaintChecker::postScanf)
-    .Case("fscanf", &GenericTaintChecker::postFscanf)
-    .Case("sscanf", &GenericTaintChecker::postFscanf)
     // TODO: Add support for vfscanf & family.
     .Case("getchar", &GenericTaintChecker::postRetTaint)
     .Case("getenv", &GenericTaintChecker::postRetTaint)
     .Case("fopen", &GenericTaintChecker::postRetTaint)
     .Case("fdopen", &GenericTaintChecker::postRetTaint)
     .Case("freopen", &GenericTaintChecker::postRetTaint)
-    .Default(&GenericTaintChecker::postDefault);
+    .Default(0);
 
   // If the callee isn't defined, it is not of security concern.
   // Check and evaluate the call.
@@ -171,8 +207,6 @@
   if (!State)
     return;
 
-  assert(State->get<TaintOnPreVisit>() == PrevisitNone &&
-         "State has to be cleared.");
   C.addTransition(State);
 }
 
@@ -213,6 +247,8 @@
   return Val.getAsSymbol();
 }
 
+// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
+// and arg 1 should get taint.
 const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
                                                    CheckerContext &C) const {
   assert(CE->getNumArgs() >= 2);
@@ -220,8 +256,13 @@
 
   // Check is the file descriptor is tainted.
   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
-      isStdin(CE->getArg(0), C))
-    return State->set<TaintOnPreVisit>(PrevisitTaintArgs);
+      isStdin(CE->getArg(0), C)) {
+    // All arguments except for the first two should get taint.
+    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
+        State = State->add<TaintArgsOnPostVisit>(i);
+    return State;
+  }
+
   return 0;
 }
 
@@ -233,22 +274,19 @@
     const Expr *Arg = CE->getArg(i);
     if (State->isTainted(Arg, C.getLocationContext()) ||
         State->isTainted(getPointedToSymbol(C, Arg)))
-      return State = State->set<TaintOnPreVisit>(PrevisitTaintRet);
+      return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
   }
   return 0;
 }
 
-const ProgramState *GenericTaintChecker::postDefault(const CallExpr *CE,
-                                                     CheckerContext &C) const {
+const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE,
+                                                    CheckerContext &C) const {
+  assert(CE->getNumArgs() >= 2);
+  const Expr *FromArg = CE->getArg(1);
   const ProgramState *State = C.getState();
-
-  // Check if we know that the result needs to be tainted based on the
-  // pre-visit analysis.
-  if (State->get<TaintOnPreVisit>() == PrevisitTaintRet) {
-    State = State->addTaint(CE, C.getLocationContext());
-    return State->set<TaintOnPreVisit>(PrevisitNone);
-  }
-
+  if (State->isTainted(FromArg, C.getLocationContext()) ||
+      State->isTainted(getPointedToSymbol(C, FromArg)))
+    return State = State->add<TaintArgsOnPostVisit>(0);
   return 0;
 }
 
@@ -262,34 +300,7 @@
     // The arguments are pointer arguments. The data they are pointing at is
     // tainted after the call.
     const Expr* Arg = CE->getArg(i);
-        SymbolRef Sym = getPointedToSymbol(C, Arg);
-    if (Sym)
-      State = State->addTaint(Sym);
-  }
-  return State;
-}
-
-/// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
-/// and arg 1 should get taint.
-const ProgramState *GenericTaintChecker::postFscanf(const CallExpr *CE,
-                                                    CheckerContext &C) const {
-  const ProgramState *State = C.getState();
-  assert(CE->getNumArgs() >= 2);
-
-  // Fscanf is only tainted if the input file is tainted at pre visit, so
-  // check for that first.
-  if (State->get<TaintOnPreVisit>() == PrevisitNone)
-    return 0;
-
-  // Reset the taint state.
-  State = State->set<TaintOnPreVisit>(PrevisitNone);
-
-  // All arguments except for the first two should get taint.
-  for (unsigned int i = 2; i < CE->getNumArgs(); ++i) {
-    // The arguments are pointer arguments. The data they are pointing at is
-    // tainted after the call.
-    const Expr* Arg = CE->getArg(i);
-    SymbolRef Sym = getPointedToSymbol(C, Arg);
+        SymbolRef Sym = getPointedToSymbol(C, Arg, true);
     if (Sym)
       State = State->addTaint(Sym);
   }
@@ -373,7 +384,7 @@
   // If either the format string content or the pointer itself are tainted, warn.
   const ProgramState *State = C.getState();
   const Expr *Arg = CE->getArg(ArgNum);
-  if (State->isTainted(getPointedToSymbol(C, Arg, false)) ||
+  if (State->isTainted(getPointedToSymbol(C, Arg)) ||
       State->isTainted(Arg, C.getLocationContext()))
     if (ExplodedNode *N = C.addTransition()) {
       initBugType();

Modified: cfe/trunk/test/Analysis/taint-generic.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/taint-generic.c?rev=148010&r1=148009&r2=148010&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/taint-generic.c (original)
+++ cfe/trunk/test/Analysis/taint-generic.c Wed Jan 11 20:22:34 2012
@@ -3,6 +3,26 @@
 int scanf(const char *restrict format, ...);
 int getchar(void);
 
+typedef struct _FILE FILE;
+extern FILE *stdin;
+int fscanf(FILE *restrict stream, const char *restrict format, ...);
+int sprintf(char *str, const char *format, ...);
+void setproctitle(const char *fmt, ...);
+typedef __typeof(sizeof(int)) size_t;
+
+// Define string functions. Use builtin for some of them. They all default to
+// the processing in the taint checker.
+#define strcpy(dest, src) \
+  ((__builtin_object_size(dest, 0) != -1ULL) \
+   ? __builtin___strcpy_chk (dest, src, __builtin_object_size(dest, 1)) \
+   : __inline_strcpy_chk(dest, src))
+
+static char *__inline_strcpy_chk (char *dest, const char *src) {
+  return __builtin___strcpy_chk(dest, src, __builtin_object_size(dest, 1));
+}
+char *stpcpy(char *restrict s1, const char *restrict s2);
+char *strncpy( char * destination, const char * source, size_t num );
+
 #define BUFSIZE 10
 
 int Buffer[BUFSIZE];
@@ -47,16 +67,23 @@
   Buffer[m] = 1;  //expected-warning {{Out of bound memory access }}
 }
 
-typedef struct _FILE FILE;
-extern FILE *stdin;
-int fscanf(FILE *restrict stream, const char *restrict format, ...);
-int sprintf(char *str, const char *format, ...);
-void setproctitle(const char *fmt, ...);
-
-void testUncontrolledFormatString() {
+void testUncontrolledFormatString(char **p) {
   char s[80];
   fscanf(stdin, "%s", s);
   char buf[128];
   sprintf(buf,s); // expected-warning {{Uncontrolled Format String}}
   setproctitle(s, 3); // expected-warning {{Uncontrolled Format String}}
+
+  // Test taint propagation through strcpy and family.
+  char scpy[80];
+  strcpy(scpy, s);
+  sprintf(buf,scpy); // expected-warning {{Uncontrolled Format String}}
+
+  char spcpy[80];
+  stpcpy(spcpy, s);
+  setproctitle(spcpy, 3); // expected-warning {{Uncontrolled Format String}}
+
+  char sncpy[80];
+  strncpy(sncpy, s, 20);
+  setproctitle(sncpy, 3); // expected-warning {{Uncontrolled Format String}}
 }





More information about the cfe-commits mailing list