[cfe-commits] r148370 - in /cfe/trunk: include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp lib/StaticAnalyzer/Core/CheckerContext.cpp test/Analysis/taint-generic.c
Anna Zaks
ganna at apple.com
Tue Jan 17 18:45:07 PST 2012
Author: zaks
Date: Tue Jan 17 20:45:07 2012
New Revision: 148370
URL: http://llvm.org/viewvc/llvm-project?rev=148370&view=rev
Log:
[analyzer] Taint: add taint propagation rules for string and memory copy
functions.
Modified:
cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp
cfe/trunk/test/Analysis/taint-generic.c
Modified: cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h?rev=148370&r1=148369&r2=148370&view=diff
==============================================================================
--- cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h (original)
+++ cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h Tue Jan 17 20:45:07 2012
@@ -149,7 +149,17 @@
const FunctionDecl *getCalleeDecl(const CallExpr *CE) const;
/// \brief Get the name of the called function (path-sensitive).
- StringRef getCalleeName(const CallExpr *CE) const;
+ StringRef getCalleeName(const FunctionDecl *FunDecl) const;
+
+ /// \brief Get the name of the called function (path-sensitive).
+ StringRef getCalleeName(const CallExpr *CE) const {
+ const FunctionDecl *FunDecl = getCalleeDecl(CE);
+ return getCalleeName(FunDecl);
+ }
+
+ /// Given a function declaration and a name checks if this is a C lib
+ /// function with the given name.
+ bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name);
private:
ExplodedNode *addTransitionImpl(const ProgramState *State,
Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp?rev=148370&r1=148369&r2=148370&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Tue Jan 17 20:45:07 2012
@@ -20,6 +20,7 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/Basic/Builtins.h"
#include <climits>
using namespace clang;
@@ -41,7 +42,10 @@
static const unsigned InvalidArgIndex = UINT_MAX - 1;
mutable llvm::OwningPtr<BugType> BT;
- void initBugType() const;
+ inline void initBugType() const {
+ if (!BT)
+ BT.reset(new BugType("Taint Analysis", "General"));
+ }
/// \brief Catch taint related bugs. Check if tainted data is passed to a
/// system call etc.
@@ -78,9 +82,6 @@
/// Taint the scanned input if the file is tainted.
const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
- /// Taint if any of the arguments are tainted.
- const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
- const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const;
/// Check if the region the expression evaluates to is the standard input,
/// and thus, is tainted.
@@ -119,18 +120,42 @@
ArgVector SrcArgs;
/// List of arguments which should be tainted on function return.
ArgVector DstArgs;
+ // TODO: Check if using other data structures would be more optimal.
TaintPropagationRule() {}
- TaintPropagationRule(unsigned SArg, unsigned DArg) {
+ TaintPropagationRule(unsigned SArg,
+ unsigned DArg, bool TaintRet = false) {
SrcArgs.push_back(SArg);
DstArgs.push_back(DArg);
+ if (TaintRet)
+ DstArgs.push_back(ReturnValueIndex);
+ }
+
+ TaintPropagationRule(unsigned SArg1, unsigned SArg2,
+ unsigned DArg, bool TaintRet = false) {
+ SrcArgs.push_back(SArg1);
+ SrcArgs.push_back(SArg2);
+ DstArgs.push_back(DArg);
+ if (TaintRet)
+ DstArgs.push_back(ReturnValueIndex);
}
+ /// Get the propagation rule for a given function.
+ static TaintPropagationRule
+ getTaintPropagationRule(const FunctionDecl *FDecl,
+ StringRef Name,
+ CheckerContext &C);
+
inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
- inline bool isNull() { return SrcArgs.empty(); }
+ inline bool isNull() const { return SrcArgs.empty(); }
+
+ inline bool isDestinationArgument(unsigned ArgNum) const {
+ return (std::find(DstArgs.begin(),
+ DstArgs.end(), ArgNum) != DstArgs.end());
+ }
};
/// \brief Pre-process a function which propagates taint according to the
@@ -141,14 +166,16 @@
};
-// TODO: We probably could use TableGen here.
+
+const unsigned GenericTaintChecker::ReturnValueIndex;
+const unsigned GenericTaintChecker::InvalidArgIndex;
+
const char GenericTaintChecker::MsgUncontrolledFormatString[] =
"Tainted format string (CWE-134: Uncontrolled Format String)";
const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
"Tainted data passed to a system call "
"(CERT/STR02-C. Sanitize data passed to complex subsystems)";
-
}
/// A set which is used to pass information from call pre-visit instruction
@@ -163,9 +190,67 @@
};
}}
-inline void GenericTaintChecker::initBugType() const {
- if (!BT)
- BT.reset(new BugType("Taint Analysis", "General"));
+GenericTaintChecker::TaintPropagationRule
+GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
+ const FunctionDecl *FDecl,
+ StringRef Name,
+ CheckerContext &C) {
+ // Check for exact name match for functions without builtin substitutes.
+ TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
+ .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
+ .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
+ .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
+ .Default(TaintPropagationRule());
+
+ if (!Rule.isNull())
+ return Rule;
+
+ // Check if it's one of the memory setting/copying functions.
+ // This check is specialized but faster then calling isCLibraryFunction.
+ unsigned BId = 0;
+ if ( (BId = FDecl->getMemoryFunctionKind()) )
+ switch(BId) {
+ case Builtin::BImemcpy:
+ case Builtin::BImemmove:
+ case Builtin::BIstrncpy:
+ case Builtin::BIstrncat:
+ return TaintPropagationRule(1, 2, 0, true);
+ break;
+ case Builtin::BIstrlcpy:
+ case Builtin::BIstrlcat:
+ return TaintPropagationRule(1, 2, 0, false);
+ break;
+ case Builtin::BIstrndup:
+ return TaintPropagationRule(0, 1, ReturnValueIndex);
+ break;
+
+ default:
+ break;
+ };
+
+ // Process all other functions which could be defined as builtins.
+ if (Rule.isNull()) {
+ if (C.isCLibraryFunction(FDecl, "snprintf") ||
+ C.isCLibraryFunction(FDecl, "sprintf"))
+ return TaintPropagationRule(InvalidArgIndex, 0, true);
+ else if (C.isCLibraryFunction(FDecl, "strcpy") ||
+ C.isCLibraryFunction(FDecl, "stpcpy") ||
+ C.isCLibraryFunction(FDecl, "strcat"))
+ return TaintPropagationRule(1, 0, true);
+ else if (C.isCLibraryFunction(FDecl, "bcopy"))
+ return TaintPropagationRule(0, 2, 1, false);
+ else if (C.isCLibraryFunction(FDecl, "strdup") ||
+ C.isCLibraryFunction(FDecl, "strdupa"))
+ return TaintPropagationRule(0, ReturnValueIndex);
+ else if (C.isCLibraryFunction(FDecl, "strndupa"))
+ return TaintPropagationRule(0, 1, ReturnValueIndex);
+ }
+
+ // Skipping the following functions, since they might be used for cleansing
+ // or smart memory copy:
+ // - memccpy - copying untill hitting a special character.
+
+ return TaintPropagationRule();
}
void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
@@ -187,41 +272,34 @@
void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
CheckerContext &C) const {
- // Set the evaluation function by switching on the callee name.
- StringRef Name = C.getCalleeName(CE);
+ const ProgramState *State = 0;
+ const FunctionDecl *FDecl = C.getCalleeDecl(CE);
+ StringRef Name = C.getCalleeName(FDecl);
if (Name.empty())
return;
- const ProgramState *State = 0;
-
- TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
- .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
- .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
- .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
- .Default(TaintPropagationRule());
-
+ // First, try generating a propagation rule for this function.
+ TaintPropagationRule Rule =
+ TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
if (!Rule.isNull()) {
State = prePropagateTaint(CE, C, Rule);
if (!State)
return;
C.addTransition(State);
+ return;
}
+ // Otherwise, check if we have custom pre-processing implemented.
FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
.Case("fscanf", &GenericTaintChecker::preFscanf)
- .Cases("strcpy", "__builtin___strcpy_chk",
- "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy)
- .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy)
- .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy)
.Default(0);
-
// Check and evaluate the call.
if (evalFunction)
State = (this->*evalFunction)(CE, C);
if (!State)
return;
-
C.addTransition(State);
+
}
bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
@@ -348,10 +426,14 @@
unsigned ArgNum = *I;
if (ArgNum == InvalidArgIndex) {
- // Check if any of the arguments is tainted.
- for (unsigned int i = 0; i < CE->getNumArgs(); ++i)
+ // Check if any of the arguments is tainted, but skip the
+ // destination arguments.
+ for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
+ if (PR.isDestinationArgument(i))
+ continue;
if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
break;
+ }
break;
}
@@ -419,30 +501,6 @@
return 0;
}
-// If any arguments are tainted, mark the return value as tainted on post-visit.
-const ProgramState * GenericTaintChecker::preAnyArgs(const CallExpr *CE,
- CheckerContext &C) const {
- for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
- const ProgramState *State = C.getState();
- const Expr *Arg = CE->getArg(i);
- if (State->isTainted(Arg, C.getLocationContext()) ||
- State->isTainted(getPointedToSymbol(C, Arg)))
- return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
- }
- return 0;
-}
-
-const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE,
- CheckerContext &C) const {
- assert(CE->getNumArgs() >= 2);
- const Expr *FromArg = CE->getArg(1);
- const ProgramState *State = C.getState();
- if (State->isTainted(FromArg, C.getLocationContext()) ||
- State->isTainted(getPointedToSymbol(C, FromArg)))
- return State = State->add<TaintArgsOnPostVisit>(0);
- return 0;
-}
-
const ProgramState *GenericTaintChecker::postScanf(const CallExpr *CE,
CheckerContext &C) const {
const ProgramState *State = C.getState();
Modified: cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp?rev=148370&r1=148369&r2=148370&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Core/CheckerContext.cpp Tue Jan 17 20:45:07 2012
@@ -13,6 +13,8 @@
//===----------------------------------------------------------------------===//
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/Basic/Builtins.h"
+
using namespace clang;
using namespace ento;
@@ -23,12 +25,31 @@
return L.getAsFunctionDecl();
}
-StringRef CheckerContext::getCalleeName(const CallExpr *CE) const {
- const FunctionDecl *funDecl = getCalleeDecl(CE);
- if (!funDecl)
+StringRef CheckerContext::getCalleeName(const FunctionDecl *FunDecl) const {
+ if (!FunDecl)
return StringRef();
- IdentifierInfo *funI = funDecl->getIdentifier();
+ IdentifierInfo *funI = FunDecl->getIdentifier();
if (!funI)
return StringRef();
return funI->getName();
}
+
+
+bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
+ StringRef Name){
+ // To avoid false positives (Ex: finding user defined functions with
+ // similar names), only perform fuzzy name matching when it's a builtin.
+ // Using a string compare is slow, we might want to switch on BuiltinID here.
+ unsigned BId = FD->getBuiltinID();
+ if (BId != 0) {
+ ASTContext &Context = getASTContext();
+ StringRef BName = Context.BuiltinInfo.GetName(BId);
+ if (StringRef(BName).find(Name) != StringRef::npos)
+ return true;
+ }
+
+ if (FD->isExternC() && FD->getIdentifier()->getName().equals(Name))
+ return true;
+
+ return false;
+}
Modified: cfe/trunk/test/Analysis/taint-generic.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/taint-generic.c?rev=148370&r1=148369&r2=148370&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/taint-generic.c (original)
+++ cfe/trunk/test/Analysis/taint-generic.c Tue Jan 17 20:45:07 2012
@@ -22,6 +22,7 @@
}
char *stpcpy(char *restrict s1, const char *restrict s2);
char *strncpy( char * destination, const char * source, size_t num );
+char *strndup(const char *s, size_t n);
#define BUFSIZE 10
@@ -86,9 +87,18 @@
stpcpy(spcpy, s);
setproctitle(spcpy, 3); // expected-warning {{Uncontrolled Format String}}
+ char *spcpyret;
+ spcpyret = stpcpy(spcpy, s);
+ setproctitle(spcpyret, 3); // expected-warning {{Uncontrolled Format String}}
+
char sncpy[80];
strncpy(sncpy, s, 20);
setproctitle(sncpy, 3); // expected-warning {{Uncontrolled Format String}}
+
+ char *dup;
+ dup = strndup(s, 20);
+ setproctitle(dup, 3); // expected-warning {{Uncontrolled Format String}}
+
}
int system(const char *command);
@@ -97,4 +107,24 @@
char addr[128];
scanf("%s", addr);
system(addr); // expected-warning {{Tainted data passed to a system call}}
+
+ // Test that spintf transfers taint.
+ sprintf(buffer, "/bin/mail %s < /tmp/email", addr);
+ system(buffer); // expected-warning {{Tainted data passed to a system call}}
+}
+void testTaintSystemCall2() {
+ // Test that snpintf transfers taint.
+ char buffern[156];
+ char addr[128];
+ scanf("%s", addr);
+ __builtin_snprintf(buffern, 10, "/bin/mail %s < /tmp/email", addr);
+ system(buffern); // expected-warning {{Tainted data passed to a system call}}
+}
+void testTaintSystemCall3() {
+ char buffern2[156];
+ int numt;
+ char addr[128];
+ scanf("%s %d", addr, &numt);
+ __builtin_snprintf(buffern2, numt, "/bin/mail %s < /tmp/email", "abcd");
+ system(buffern2); // expected-warning {{Tainted data passed to a system call}}
}
More information about the cfe-commits
mailing list