[cfe-commits] r148010 - in /cfe/trunk: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp test/Analysis/taint-generic.c
Anna Zaks
ganna at apple.com
Wed Jan 11 18:22:35 PST 2012
Author: zaks
Date: Wed Jan 11 20:22:34 2012
New Revision: 148010
URL: http://llvm.org/viewvc/llvm-project?rev=148010&view=rev
Log:
[analyzer] Add taint transfer by strcpy & others (part 1).
To simplify the process:
Refactor taint generation checker to simplify passing the
information on which arguments need to be tainted from pre to post
visit.
Todo: We need to factor out the code that sema is using to identify the
string and memcpy functions and use it here and in the CString checker.
Modified:
cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
cfe/trunk/test/Analysis/taint-generic.c
Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp?rev=148010&r1=148009&r2=148010&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Wed Jan 11 20:22:34 2012
@@ -20,6 +20,7 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include <climits>
using namespace clang;
using namespace ento;
@@ -28,48 +29,42 @@
class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
check::PreStmt<CallExpr> > {
public:
- enum TaintOnPreVisitKind {
- /// No taint propagates from pre-visit to post-visit.
- PrevisitNone = 0,
- /// Based on the pre-visit, the return argument of the call
- /// should be tainted.
- PrevisitTaintRet = 1,
- /// Based on the pre-visit, the call can taint values through it's
- /// pointer/reference arguments.
- PrevisitTaintArgs = 2
- };
+ static const unsigned ReturnValueIndex = UINT_MAX;
private:
mutable llvm::OwningPtr<BugType> BT;
void initBugType() const;
- /// Add/propagate taint on a post visit.
- void taintPost(const CallExpr *CE, CheckerContext &C) const;
- /// Add/propagate taint on a pre visit.
- void taintPre(const CallExpr *CE, CheckerContext &C) const;
-
- /// Catch taint related bugs. Check if tainted data is passed to a system
- /// call etc.
+ /// \brief Catch taint related bugs. Check if tainted data is passed to a
+ /// system call etc.
bool checkPre(const CallExpr *CE, CheckerContext &C) const;
- /// Given a pointer argument, get the symbol of the value it contains
+ /// \brief Add taint sources on a pre-visit.
+ void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
+
+ /// \brief Propagate taint generated at pre-visit.
+ bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
+
+ /// \brief Add taint sources on a post visit.
+ void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
+
+ /// \brief Given a pointer argument, get the symbol of the value it contains
/// (points to).
SymbolRef getPointedToSymbol(CheckerContext &C,
const Expr *Arg,
- bool IssueWarning = true) const;
+ bool IssueWarning = false) const;
/// Functions defining the attack surface.
typedef const ProgramState *(GenericTaintChecker::*FnCheck)(const CallExpr *,
CheckerContext &C) const;
const ProgramState *postScanf(const CallExpr *CE, CheckerContext &C) const;
- const ProgramState *postFscanf(const CallExpr *CE, CheckerContext &C) const;
const ProgramState *postRetTaint(const CallExpr *CE, CheckerContext &C) const;
- const ProgramState *postDefault(const CallExpr *CE, CheckerContext &C) const;
/// Taint the scanned input if the file is tainted.
const ProgramState *preFscanf(const CallExpr *CE, CheckerContext &C) const;
/// Taint if any of the arguments are tainted.
const ProgramState *preAnyArgs(const CallExpr *CE, CheckerContext &C) const;
+ const ProgramState *preStrcpy(const CallExpr *CE, CheckerContext &C) const;
/// Check if the region the expression evaluates to is the standard input,
/// and thus, is tainted.
@@ -90,20 +85,17 @@
};
}
-/// Definitions for the checker specific state.
-namespace { struct TaintOnPreVisit {};}
-namespace clang {
-namespace ento {
- /// A flag which is used to pass information from call pre-visit instruction
- /// to the call post-visit. The value is an unsigned, which takes on values
- /// of the TaintOnPreVisitKind enumeration.
- template<>
- struct ProgramStateTrait<TaintOnPreVisit> :
- public ProgramStatePartialTrait<unsigned> {
- static void *GDMIndex() { return GenericTaintChecker::getTag(); }
- };
-}
-}
+/// A set which is used to pass information from call pre-visit instruction
+/// to the call post-visit. The values are unsigned integers, which are either
+/// ReturnValueIndex, or indexes of the pointer/reference argument, which
+/// points to data, which should be tainted on return.
+namespace { struct TaintArgsOnPostVisit{}; }
+namespace clang { namespace ento {
+template<> struct ProgramStateTrait<TaintArgsOnPostVisit>
+ : public ProgramStatePartialTrait<llvm::ImmutableSet<unsigned> > {
+ static void *GDMIndex() { return GenericTaintChecker::getTag(); }
+};
+}}
inline void GenericTaintChecker::initBugType() const {
if (!BT)
@@ -117,23 +109,31 @@
return;
// Add taint second.
- taintPre(CE, C);
+ addSourcesPre(CE, C);
}
void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
CheckerContext &C) const {
- taintPost(CE, C);
+ if (propagateFromPre(CE, C))
+ return;
+ addSourcesPost(CE, C);
}
-void GenericTaintChecker::taintPre(const CallExpr *CE,
- CheckerContext &C) const {
+void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
+ CheckerContext &C) const {
// Set the evaluation function by switching on the callee name.
StringRef Name = C.getCalleeName(CE);
+ if (Name.empty())
+ return;
FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
- .Case("fscanf", &GenericTaintChecker::preFscanf)
.Case("atoi", &GenericTaintChecker::preAnyArgs)
.Case("atol", &GenericTaintChecker::preAnyArgs)
.Case("atoll", &GenericTaintChecker::preAnyArgs)
+ .Case("fscanf", &GenericTaintChecker::preFscanf)
+ .Cases("strcpy", "__builtin___strcpy_chk",
+ "__inline_strcpy_chk", &GenericTaintChecker::preStrcpy)
+ .Cases("stpcpy", "__builtin___stpcpy_chk", &GenericTaintChecker::preStrcpy)
+ .Cases("strncpy", "__builtin___strncpy_chk", &GenericTaintChecker::preStrcpy)
.Default(0);
// Check and evaluate the call.
@@ -146,22 +146,58 @@
C.addTransition(State);
}
-void GenericTaintChecker::taintPost(const CallExpr *CE,
- CheckerContext &C) const {
+bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
+ CheckerContext &C) const {
+ const ProgramState *State = C.getState();
+
+ // Depending on what was tainted at pre-visit, we determined a set of
+ // arguments which should be tainted after the function returns. These are
+ // stored in the state as TaintArgsOnPostVisit set.
+ llvm::ImmutableSet<unsigned> TaintArgs = State->get<TaintArgsOnPostVisit>();
+ for (llvm::ImmutableSet<unsigned>::iterator
+ I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
+ unsigned ArgNum = *I;
+
+ // Special handling for the tainted return value.
+ if (ArgNum == ReturnValueIndex) {
+ State = State->addTaint(CE, C.getLocationContext());
+ continue;
+ }
+
+ // The arguments are pointer arguments. The data they are pointing at is
+ // tainted after the call.
+ const Expr* Arg = CE->getArg(ArgNum);
+ SymbolRef Sym = getPointedToSymbol(C, Arg, true);
+ if (Sym)
+ State = State->addTaint(Sym);
+ }
+
+ // Clear up the taint info from the state.
+ State = State->remove<TaintArgsOnPostVisit>();
+
+ if (State != C.getState()) {
+ C.addTransition(State);
+ return true;
+ }
+ return false;
+}
+
+void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
+ CheckerContext &C) const {
// Define the attack surface.
// Set the evaluation function by switching on the callee name.
StringRef Name = C.getCalleeName(CE);
+ if (Name.empty())
+ return;
FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
.Case("scanf", &GenericTaintChecker::postScanf)
- .Case("fscanf", &GenericTaintChecker::postFscanf)
- .Case("sscanf", &GenericTaintChecker::postFscanf)
// TODO: Add support for vfscanf & family.
.Case("getchar", &GenericTaintChecker::postRetTaint)
.Case("getenv", &GenericTaintChecker::postRetTaint)
.Case("fopen", &GenericTaintChecker::postRetTaint)
.Case("fdopen", &GenericTaintChecker::postRetTaint)
.Case("freopen", &GenericTaintChecker::postRetTaint)
- .Default(&GenericTaintChecker::postDefault);
+ .Default(0);
// If the callee isn't defined, it is not of security concern.
// Check and evaluate the call.
@@ -171,8 +207,6 @@
if (!State)
return;
- assert(State->get<TaintOnPreVisit>() == PrevisitNone &&
- "State has to be cleared.");
C.addTransition(State);
}
@@ -213,6 +247,8 @@
return Val.getAsSymbol();
}
+// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
+// and arg 1 should get taint.
const ProgramState *GenericTaintChecker::preFscanf(const CallExpr *CE,
CheckerContext &C) const {
assert(CE->getNumArgs() >= 2);
@@ -220,8 +256,13 @@
// Check is the file descriptor is tainted.
if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
- isStdin(CE->getArg(0), C))
- return State->set<TaintOnPreVisit>(PrevisitTaintArgs);
+ isStdin(CE->getArg(0), C)) {
+ // All arguments except for the first two should get taint.
+ for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
+ State = State->add<TaintArgsOnPostVisit>(i);
+ return State;
+ }
+
return 0;
}
@@ -233,22 +274,19 @@
const Expr *Arg = CE->getArg(i);
if (State->isTainted(Arg, C.getLocationContext()) ||
State->isTainted(getPointedToSymbol(C, Arg)))
- return State = State->set<TaintOnPreVisit>(PrevisitTaintRet);
+ return State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
}
return 0;
}
-const ProgramState *GenericTaintChecker::postDefault(const CallExpr *CE,
- CheckerContext &C) const {
+const ProgramState * GenericTaintChecker::preStrcpy(const CallExpr *CE,
+ CheckerContext &C) const {
+ assert(CE->getNumArgs() >= 2);
+ const Expr *FromArg = CE->getArg(1);
const ProgramState *State = C.getState();
-
- // Check if we know that the result needs to be tainted based on the
- // pre-visit analysis.
- if (State->get<TaintOnPreVisit>() == PrevisitTaintRet) {
- State = State->addTaint(CE, C.getLocationContext());
- return State->set<TaintOnPreVisit>(PrevisitNone);
- }
-
+ if (State->isTainted(FromArg, C.getLocationContext()) ||
+ State->isTainted(getPointedToSymbol(C, FromArg)))
+ return State = State->add<TaintArgsOnPostVisit>(0);
return 0;
}
@@ -262,34 +300,7 @@
// The arguments are pointer arguments. The data they are pointing at is
// tainted after the call.
const Expr* Arg = CE->getArg(i);
- SymbolRef Sym = getPointedToSymbol(C, Arg);
- if (Sym)
- State = State->addTaint(Sym);
- }
- return State;
-}
-
-/// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
-/// and arg 1 should get taint.
-const ProgramState *GenericTaintChecker::postFscanf(const CallExpr *CE,
- CheckerContext &C) const {
- const ProgramState *State = C.getState();
- assert(CE->getNumArgs() >= 2);
-
- // Fscanf is only tainted if the input file is tainted at pre visit, so
- // check for that first.
- if (State->get<TaintOnPreVisit>() == PrevisitNone)
- return 0;
-
- // Reset the taint state.
- State = State->set<TaintOnPreVisit>(PrevisitNone);
-
- // All arguments except for the first two should get taint.
- for (unsigned int i = 2; i < CE->getNumArgs(); ++i) {
- // The arguments are pointer arguments. The data they are pointing at is
- // tainted after the call.
- const Expr* Arg = CE->getArg(i);
- SymbolRef Sym = getPointedToSymbol(C, Arg);
+ SymbolRef Sym = getPointedToSymbol(C, Arg, true);
if (Sym)
State = State->addTaint(Sym);
}
@@ -373,7 +384,7 @@
// If either the format string content or the pointer itself are tainted, warn.
const ProgramState *State = C.getState();
const Expr *Arg = CE->getArg(ArgNum);
- if (State->isTainted(getPointedToSymbol(C, Arg, false)) ||
+ if (State->isTainted(getPointedToSymbol(C, Arg)) ||
State->isTainted(Arg, C.getLocationContext()))
if (ExplodedNode *N = C.addTransition()) {
initBugType();
Modified: cfe/trunk/test/Analysis/taint-generic.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/taint-generic.c?rev=148010&r1=148009&r2=148010&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/taint-generic.c (original)
+++ cfe/trunk/test/Analysis/taint-generic.c Wed Jan 11 20:22:34 2012
@@ -3,6 +3,26 @@
int scanf(const char *restrict format, ...);
int getchar(void);
+typedef struct _FILE FILE;
+extern FILE *stdin;
+int fscanf(FILE *restrict stream, const char *restrict format, ...);
+int sprintf(char *str, const char *format, ...);
+void setproctitle(const char *fmt, ...);
+typedef __typeof(sizeof(int)) size_t;
+
+// Define string functions. Use builtin for some of them. They all default to
+// the processing in the taint checker.
+#define strcpy(dest, src) \
+ ((__builtin_object_size(dest, 0) != -1ULL) \
+ ? __builtin___strcpy_chk (dest, src, __builtin_object_size(dest, 1)) \
+ : __inline_strcpy_chk(dest, src))
+
+static char *__inline_strcpy_chk (char *dest, const char *src) {
+ return __builtin___strcpy_chk(dest, src, __builtin_object_size(dest, 1));
+}
+char *stpcpy(char *restrict s1, const char *restrict s2);
+char *strncpy( char * destination, const char * source, size_t num );
+
#define BUFSIZE 10
int Buffer[BUFSIZE];
@@ -47,16 +67,23 @@
Buffer[m] = 1; //expected-warning {{Out of bound memory access }}
}
-typedef struct _FILE FILE;
-extern FILE *stdin;
-int fscanf(FILE *restrict stream, const char *restrict format, ...);
-int sprintf(char *str, const char *format, ...);
-void setproctitle(const char *fmt, ...);
-
-void testUncontrolledFormatString() {
+void testUncontrolledFormatString(char **p) {
char s[80];
fscanf(stdin, "%s", s);
char buf[128];
sprintf(buf,s); // expected-warning {{Uncontrolled Format String}}
setproctitle(s, 3); // expected-warning {{Uncontrolled Format String}}
+
+ // Test taint propagation through strcpy and family.
+ char scpy[80];
+ strcpy(scpy, s);
+ sprintf(buf,scpy); // expected-warning {{Uncontrolled Format String}}
+
+ char spcpy[80];
+ stpcpy(spcpy, s);
+ setproctitle(spcpy, 3); // expected-warning {{Uncontrolled Format String}}
+
+ char sncpy[80];
+ strncpy(sncpy, s, 20);
+ setproctitle(sncpy, 3); // expected-warning {{Uncontrolled Format String}}
}
More information about the cfe-commits
mailing list