[cfe-commits] r148370 - in /cfe/trunk: include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp lib/StaticAnalyzer/Core/CheckerContext.cpp test/Analysis/taint-generic.c

Anna Zaks ganna at apple.com
Tue Jan 17 18:45:07 PST 2012


Author: zaks
Date: Tue Jan 17 20:45:07 2012
New Revision: 148370

URL: http://llvm.org/viewvc/llvm-project?rev=148370&view=rev
Log:
[analyzer] Taint: add taint propagation rules for string and memory copy
functions.

Modified:
    cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
    cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
    cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp
    cfe/trunk/test/Analysis/taint-generic.c

Modified: cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h?rev=148370&r1=148369&r2=148370&view=diff
==============================================================================
--- cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h (original)
+++ cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h Tue Jan 17 20:45:07 2012
@@ -149,7 +149,17 @@
   const FunctionDecl *getCalleeDecl(const CallExpr *CE) const;
 
   /// \brief Get the name of the called function (path-sensitive).
-  StringRef getCalleeName(const CallExpr *CE) const;
+  StringRef getCalleeName(const FunctionDecl *FunDecl) const;
+
+  /// \brief Get the name of the called function (path-sensitive).
+  StringRef getCalleeName(const CallExpr *CE) const {
+    const FunctionDecl *FunDecl = getCalleeDecl(CE);
+    return getCalleeName(FunDecl);
+  }
+
+  /// Given a function declaration and a name checks if this is a C lib
+  /// function with the given name.
+  bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name);
 
 private:
   ExplodedNode *addTransitionImpl(const ProgramState *State,

Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp?rev=148370&r1=148369&r2=148370&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Tue Jan 17 20:45:07 2012
@@ -20,6 +20,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/Basic/Builtins.h"
 #include <climits>
 
 using namespace clang;
@@ -41,7 +42,10 @@
   static const unsigned InvalidArgIndex = UINT_MAX - 1;
 
   mutable llvm::OwningPtr<BugType> BT;
-  void initBugType() const;
+  inline void initBugType() const {
+    if (!BT)
+      BT.reset(new BugType("Taint Analysis", "General"));
+  }
 
   /// \brief Catch taint related bugs. Check if tainted data is passed to a
   /// system call etc.
@@ -78,9 +82,6 @@
 
   /// Taint the scanned input if the file is tainted.
   const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
-  /// Taint if any of the arguments are tainted.
-  const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
-  const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const;
 
   /// Check if the region the expression evaluates to is the standard input,
   /// and thus, is tainted.
@@ -119,18 +120,42 @@
     ArgVector SrcArgs;
     /// List of arguments which should be tainted on function return.
     ArgVector DstArgs;
+    // TODO: Check if using other data structures would be more optimal.
 
     TaintPropagationRule() {}
 
-    TaintPropagationRule(unsigned SArg, unsigned DArg) {
+    TaintPropagationRule(unsigned SArg,
+                         unsigned DArg, bool TaintRet = false) {
       SrcArgs.push_back(SArg);
       DstArgs.push_back(DArg);
+      if (TaintRet)
+        DstArgs.push_back(ReturnValueIndex);
+    }
+
+    TaintPropagationRule(unsigned SArg1, unsigned SArg2,
+                         unsigned DArg, bool TaintRet = false) {
+      SrcArgs.push_back(SArg1);
+      SrcArgs.push_back(SArg2);
+      DstArgs.push_back(DArg);
+      if (TaintRet)
+        DstArgs.push_back(ReturnValueIndex);
     }
 
+    /// Get the propagation rule for a given function.
+    static TaintPropagationRule
+      getTaintPropagationRule(const FunctionDecl *FDecl,
+                              StringRef Name,
+                              CheckerContext &C);
+
     inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
     inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
 
-    inline bool isNull() { return SrcArgs.empty(); }
+    inline bool isNull() const { return SrcArgs.empty(); }
+
+    inline bool isDestinationArgument(unsigned ArgNum) const {
+      return (std::find(DstArgs.begin(),
+                        DstArgs.end(), ArgNum) != DstArgs.end());
+    }
   };   
   
   /// \brief Pre-process a function which propagates taint according to the
@@ -141,14 +166,16 @@
 
                               
 };
-// TODO: We probably could use TableGen here.
+
+const unsigned GenericTaintChecker::ReturnValueIndex;
+const unsigned GenericTaintChecker::InvalidArgIndex;
+
 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
   "Tainted format string (CWE-134: Uncontrolled Format String)";
 
 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
   "Tainted data passed to a system call "
   "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
-
 }
 
 /// A set which is used to pass information from call pre-visit instruction
@@ -163,9 +190,67 @@
 };
 }}
 
-inline void GenericTaintChecker::initBugType() const {
-  if (!BT)
-    BT.reset(new BugType("Taint Analysis", "General"));
+GenericTaintChecker::TaintPropagationRule
+GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
+                                                     const FunctionDecl *FDecl,
+                                                     StringRef Name,
+                                                     CheckerContext &C) {
+  // Check for exact name match for functions without builtin substitutes.
+  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
+    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
+    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
+    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
+    .Default(TaintPropagationRule());
+
+  if (!Rule.isNull())
+    return Rule;
+
+  // Check if it's one of the memory setting/copying functions.
+  // This check is specialized but faster then calling isCLibraryFunction.
+  unsigned BId = 0;
+  if ( (BId = FDecl->getMemoryFunctionKind()) )
+    switch(BId) {
+    case Builtin::BImemcpy:
+    case Builtin::BImemmove:
+    case Builtin::BIstrncpy:
+    case Builtin::BIstrncat:
+      return TaintPropagationRule(1, 2, 0, true);
+      break;
+    case Builtin::BIstrlcpy:
+    case Builtin::BIstrlcat:
+      return TaintPropagationRule(1, 2, 0, false);
+      break;
+    case Builtin::BIstrndup:
+      return TaintPropagationRule(0, 1, ReturnValueIndex);
+      break;
+
+    default:
+      break;
+    };
+
+  // Process all other functions which could be defined as builtins.
+  if (Rule.isNull()) {
+    if (C.isCLibraryFunction(FDecl, "snprintf") ||
+        C.isCLibraryFunction(FDecl, "sprintf"))
+      return TaintPropagationRule(InvalidArgIndex, 0, true);
+    else if (C.isCLibraryFunction(FDecl, "strcpy") ||
+             C.isCLibraryFunction(FDecl, "stpcpy") ||
+             C.isCLibraryFunction(FDecl, "strcat"))
+      return TaintPropagationRule(1, 0, true);
+    else if (C.isCLibraryFunction(FDecl, "bcopy"))
+      return TaintPropagationRule(0, 2, 1, false);
+    else if (C.isCLibraryFunction(FDecl, "strdup") ||
+             C.isCLibraryFunction(FDecl, "strdupa"))
+      return TaintPropagationRule(0, ReturnValueIndex);
+    else if (C.isCLibraryFunction(FDecl, "strndupa"))
+      return TaintPropagationRule(0, 1, ReturnValueIndex);
+  }
+
+  // Skipping the following functions, since they might be used for cleansing
+  // or smart memory copy:
+  // - memccpy - copying untill hitting a special character.
+
+  return TaintPropagationRule();
 }
 
 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
@@ -187,41 +272,34 @@
 
 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
                                         CheckerContext &C) const {
-  // Set the evaluation function by switching on the callee name.
-  StringRef Name = C.getCalleeName(CE);
+  const ProgramState *State = 0;
+  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+  StringRef Name = C.getCalleeName(FDecl);
   if (Name.empty())
     return;
 
-  const ProgramState *State = 0;
-
-  TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
-    .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
-    .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
-    .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
-    .Default(TaintPropagationRule());
-
+  // First, try generating a propagation rule for this function.
+  TaintPropagationRule Rule =
+    TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
   if (!Rule.isNull()) {
     State = prePropagateTaint(CE, C, Rule);
     if (!State)
       return;
     C.addTransition(State);
+    return;
   }
 
+  // Otherwise, check if we have custom pre-processing implemented.
   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
     .Case("fscanf", &GenericTaintChecker::preFscanf)
-    .Cases("strcpy", "__builtin___strcpy_chk",
-           "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy)
-    .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy)
-    .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy)
     .Default(0);
-
   // Check and evaluate the call.
   if (evalFunction)
     State = (this->*evalFunction)(CE, C);
   if (!State)
     return;
-
   C.addTransition(State);
+
 }
 
 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
@@ -348,10 +426,14 @@
     unsigned ArgNum = *I;
 
     if (ArgNum == InvalidArgIndex) {
-      // Check if any of the arguments is tainted.
-      for (unsigned int i = 0; i < CE->getNumArgs(); ++i)
+      // Check if any of the arguments is tainted, but skip the
+      // destination arguments.
+      for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
+        if (PR.isDestinationArgument(i))
+          continue;
         if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
           break;
+      }
       break;
     }
 
@@ -419,30 +501,6 @@
   return 0;
 }
 
-// If any arguments are tainted, mark the return value as tainted on post-visit.
-const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
-                                                     CheckerContext &C) const {
-  for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
-    const ProgramState *State = C.getState();
-    const Expr *Arg = CE->getArg(i);
-    if (State->isTainted(Arg, C.getLocationContext()) ||
-        State->isTainted(getPointedToSymbol(C, Arg)))
-      return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
-  }
-  return 0;
-}
-
-const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE,
-                                                    CheckerContext &C) const {
-  assert(CE->getNumArgs() >= 2);
-  const Expr *FromArg = CE->getArg(1);
-  const ProgramState *State = C.getState();
-  if (State->isTainted(FromArg, C.getLocationContext()) ||
-      State->isTainted(getPointedToSymbol(C, FromArg)))
-    return State = State->add<TaintArgsOnPostVisit>(0);
-  return 0;
-}
-
 const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
                                                    CheckerContext &C) const {
   const ProgramState *State = C.getState();

Modified: cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp?rev=148370&r1=148369&r2=148370&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp Tue Jan 17 20:45:07 2012
@@ -13,6 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/Basic/Builtins.h"
+
 using namespace clang;
 using namespace ento;
 
@@ -23,12 +25,31 @@
   return L.getAsFunctionDecl();
 }
 
-StringRef CheckerContext::getCalleeName(const CallExpr *CE) const {
-  const FunctionDecl *funDecl = getCalleeDecl(CE);
-  if (!funDecl)
+StringRef CheckerContext::getCalleeName(const FunctionDecl *FunDecl) const {
+  if (!FunDecl)
     return StringRef();
-  IdentifierInfo *funI = funDecl->getIdentifier();
+  IdentifierInfo *funI = FunDecl->getIdentifier();
   if (!funI)
     return StringRef();
   return funI->getName();
 }
+
+
+bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
+                                        StringRef Name){
+  // To avoid false positives (Ex: finding user defined functions with
+  // similar names), only perform fuzzy name matching when it's a builtin.
+  // Using a string compare is slow, we might want to switch on BuiltinID here.
+  unsigned BId = FD->getBuiltinID();
+  if (BId != 0) {
+    ASTContext &Context = getASTContext();
+    StringRef BName = Context.BuiltinInfo.GetName(BId);
+    if (StringRef(BName).find(Name) != StringRef::npos)
+      return true;
+  }
+
+  if (FD->isExternC() && FD->getIdentifier()->getName().equals(Name))
+    return true;
+
+  return false;
+}

Modified: cfe/trunk/test/Analysis/taint-generic.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/taint-generic.c?rev=148370&r1=148369&r2=148370&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/taint-generic.c (original)
+++ cfe/trunk/test/Analysis/taint-generic.c Tue Jan 17 20:45:07 2012
@@ -22,6 +22,7 @@
 }
 char *stpcpy(char *restrict s1, const char *restrict s2);
 char *strncpy( char * destination, const char * source, size_t num );
+char *strndup(const char *s, size_t n);
 
 #define BUFSIZE 10
 
@@ -86,9 +87,18 @@
   stpcpy(spcpy, s);
   setproctitle(spcpy, 3); // expected-warning {{Uncontrolled Format String}}
 
+  char *spcpyret;
+  spcpyret = stpcpy(spcpy, s);
+  setproctitle(spcpyret, 3); // expected-warning {{Uncontrolled Format String}}
+
   char sncpy[80];
   strncpy(sncpy, s, 20);
   setproctitle(sncpy, 3); // expected-warning {{Uncontrolled Format String}}
+
+  char *dup;
+  dup = strndup(s, 20);
+  setproctitle(dup, 3); // expected-warning {{Uncontrolled Format String}}
+
 }
 
 int system(const char *command);
@@ -97,4 +107,24 @@
   char addr[128];
   scanf("%s", addr);
   system(addr); // expected-warning {{Tainted data passed to a system call}}
+
+  // Test that spintf transfers taint.
+  sprintf(buffer, "/bin/mail %s < /tmp/email", addr);
+  system(buffer); // expected-warning {{Tainted data passed to a system call}}
+}
+void testTaintSystemCall2() {
+  // Test that snpintf transfers taint.
+  char buffern[156];
+  char addr[128];
+  scanf("%s", addr);
+  __builtin_snprintf(buffern, 10, "/bin/mail %s < /tmp/email", addr);
+  system(buffern); // expected-warning {{Tainted data passed to a system call}}
+}
+void testTaintSystemCall3() {
+  char buffern2[156];
+  int numt;
+  char addr[128];
+  scanf("%s %d", addr, &numt);
+  __builtin_snprintf(buffern2, numt, "/bin/mail %s < /tmp/email", "abcd");
+  system(buffern2); // expected-warning {{Tainted data passed to a system call}}
 }





More information about the cfe-commits mailing list