r355703 - [analyzer] Use the new infrastructure of expressing taint propagation, NFC

Kristof Umann via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 8 07:47:56 PST 2019


Author: szelethus
Date: Fri Mar  8 07:47:56 2019
New Revision: 355703

URL: http://llvm.org/viewvc/llvm-project?rev=355703&view=rev
Log:
[analyzer] Use the new infrastructure of expressing taint propagation, NFC

In D55734, we implemented a far more general way of describing taint propagation
rules for functions, like being able to specify an unlimited amount of
source and destination parameters. Previously, we didn't have a particularly
elegant way of expressing the propagation rules for functions that always return
(either through an out-param or return value) a tainted value. In this patch,
we model these functions similarly to other ones, by assigning them a
TaintPropagationRule that describes that they "create a tainted value out of
nothing".

The socket C function is somewhat special, because for certain parameters (for
example, if we supply localhost as parameter), none of the out-params should
be tainted. For this, we added a general solution of being able to specify
custom taint propagation rules through function pointers.

Patch by Gábor Borsik!

Differential Revision: https://reviews.llvm.org/D59055

Modified:
    cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp

Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp?rev=355703&r1=355702&r2=355703&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Fri Mar  8 07:47:56 2019
@@ -62,9 +62,6 @@ private:
   /// Propagate taint generated at pre-visit.
   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
 
-  /// Add taint sources on a post visit.
-  void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
-
   /// Check if the region the expression evaluates to is the standard input,
   /// and thus, is tainted.
   static bool isStdin(const Expr *E, CheckerContext &C);
@@ -72,16 +69,6 @@ private:
   /// Given a pointer argument, return the value it points to.
   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
 
-  /// Functions defining the attack surface.
-  using FnCheck = ProgramStateRef (GenericTaintChecker::*)(
-      const CallExpr *, CheckerContext &C) const;
-  ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
-  ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
-  ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
-
-  /// Taint the scanned input if the file is tainted.
-  ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
-
   /// Check for CWE-134: Uncontrolled Format String.
   static const char MsgUncontrolledFormatString[];
   bool checkUncontrolledFormatString(const CallExpr *CE,
@@ -118,6 +105,9 @@ private:
   struct TaintPropagationRule {
     enum class VariadicType { None, Src, Dst };
 
+    using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
+                                         CheckerContext &C);
+
     /// List of arguments which can be taint sources and should be checked.
     ArgVector SrcArgs;
     /// List of arguments which should be tainted on function return.
@@ -127,16 +117,21 @@ private:
     /// Show when a function has variadic parameters. If it has, it marks all
     /// of them as source or destination.
     VariadicType VarType;
+    /// Special function for tainted source determination. If defined, it can
+    /// override the default behavior.
+    PropagationFuncType PropagationFunc;
 
     TaintPropagationRule()
-        : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None) {}
+        : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
+          PropagationFunc(nullptr) {}
 
     TaintPropagationRule(std::initializer_list<unsigned> &&Src,
                          std::initializer_list<unsigned> &&Dst,
                          VariadicType Var = VariadicType::None,
-                         unsigned VarIndex = InvalidArgIndex)
+                         unsigned VarIndex = InvalidArgIndex,
+                         PropagationFuncType Func = nullptr)
         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
-          VariadicIndex(VarIndex), VarType(Var) {}
+          VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
 
     /// Get the propagation rule for a given function.
     static TaintPropagationRule
@@ -170,6 +165,10 @@ private:
     /// Pre-process a function which propagates taint according to the
     /// taint rule.
     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
+
+    // Functions for custom taintedness propagation.
+    static bool postSocket(bool IsTainted, const CallExpr *CE,
+                           CheckerContext &C);
   };
 };
 
@@ -206,25 +205,42 @@ GenericTaintChecker::TaintPropagationRul
   // Check for exact name match for functions without builtin substitutes.
   TaintPropagationRule Rule =
       llvm::StringSwitch<TaintPropagationRule>(Name)
+          // Source functions
+          // TODO: Add support for vfscanf & family.
+          .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
+          .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
+          .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
+          .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
+          .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
+          .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
+          .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
+          .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
+          .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
+          .Case("socket",
+                TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
+                                     InvalidArgIndex,
+                                     &TaintPropagationRule::postSocket))
+          .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
+          // Propagating functions
           .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
           .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
           .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
-          .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
           .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
+          .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
+          .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
+          .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
+          .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
           .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
+          .Case("getdelim", TaintPropagationRule({3}, {0}))
+          .Case("getline", TaintPropagationRule({2}, {0}))
           .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
-          .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
-          .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
-          .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
-          .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
-          .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
           .Case("pread",
                 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
-          .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
-          .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
-          .Case("getline", TaintPropagationRule({2}, {0}))
-          .Case("getdelim", TaintPropagationRule({3}, {0}))
-          .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
+          .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
+          .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
+          .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
+          .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
+          .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
           .Default(TaintPropagationRule());
 
   if (!Rule.isNull())
@@ -280,19 +296,21 @@ GenericTaintChecker::TaintPropagationRul
 
 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
                                        CheckerContext &C) const {
-  // Check for errors first.
+  // Check for taintedness related errors first: system call, uncontrolled
+  // format string, tainted buffer size.
   if (checkPre(CE, C))
     return;
 
-  // Add taint second.
+  // Marks the function's arguments and/or return value tainted if it present in
+  // the list.
   addSourcesPre(CE, C);
 }
 
 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
                                         CheckerContext &C) const {
-  if (propagateFromPre(CE, C))
-    return;
-  addSourcesPost(CE, C);
+  // Set the marked values as tainted. The return value only accessible from
+  // checkPostStmt.
+  propagateFromPre(CE, C);
 }
 
 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
@@ -317,13 +335,6 @@ void GenericTaintChecker::addSourcesPre(
     return;
   }
 
-  // Otherwise, check if we have custom pre-processing implemented.
-  FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
-                             .Case("fscanf", &GenericTaintChecker::preFscanf)
-                             .Default(nullptr);
-  // Check and evaluate the call.
-  if (evalFunction)
-    State = (this->*evalFunction)(CE, C);
   if (!State)
     return;
   C.addTransition(State);
@@ -367,43 +378,6 @@ bool GenericTaintChecker::propagateFromP
   return false;
 }
 
-void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
-                                         CheckerContext &C) const {
-  // Define the attack surface.
-  // Set the evaluation function by switching on the callee name.
-  const FunctionDecl *FDecl = C.getCalleeDecl(CE);
-  if (!FDecl || FDecl->getKind() != Decl::Function)
-    return;
-
-  StringRef Name = C.getCalleeName(FDecl);
-  if (Name.empty())
-    return;
-  FnCheck evalFunction =
-      llvm::StringSwitch<FnCheck>(Name)
-          .Case("scanf", &GenericTaintChecker::postScanf)
-          // TODO: Add support for vfscanf & family.
-          .Case("getchar", &GenericTaintChecker::postRetTaint)
-          .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
-          .Case("getenv", &GenericTaintChecker::postRetTaint)
-          .Case("fopen", &GenericTaintChecker::postRetTaint)
-          .Case("fdopen", &GenericTaintChecker::postRetTaint)
-          .Case("freopen", &GenericTaintChecker::postRetTaint)
-          .Case("getch", &GenericTaintChecker::postRetTaint)
-          .Case("wgetch", &GenericTaintChecker::postRetTaint)
-          .Case("socket", &GenericTaintChecker::postSocket)
-          .Default(nullptr);
-
-  // If the callee isn't defined, it is not of security concern.
-  // Check and evaluate the call.
-  ProgramStateRef State = nullptr;
-  if (evalFunction)
-    State = (this->*evalFunction)(CE, C);
-  if (!State)
-    return;
-
-  C.addTransition(State);
-}
-
 bool GenericTaintChecker::checkPre(const CallExpr *CE,
                                    CheckerContext &C) const {
 
@@ -475,6 +449,9 @@ GenericTaintChecker::TaintPropagationRul
     }
   }
 
+  if (PropagationFunc)
+    IsTainted = PropagationFunc(IsTainted, CE, C);
+
   if (!IsTainted)
     return State;
 
@@ -511,63 +488,18 @@ GenericTaintChecker::TaintPropagationRul
   return State;
 }
 
-// If argument 0 (file descriptor) is tainted, all arguments except for arg 0
-// and arg 1 should get taint.
-ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
-                                               CheckerContext &C) const {
-  assert(CE->getNumArgs() >= 2);
-  ProgramStateRef State = C.getState();
-
-  // Check is the file descriptor is tainted.
-  if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
-      isStdin(CE->getArg(0), C)) {
-    // All arguments except for the first two should get taint.
-    for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
-      State = State->add<TaintArgsOnPostVisit>(i);
-    return State;
-  }
-
-  return nullptr;
-}
-
 // If argument 0(protocol domain) is network, the return value should get taint.
-ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
-                                                CheckerContext &C) const {
-  ProgramStateRef State = C.getState();
-  if (CE->getNumArgs() < 3)
-    return State;
-
+bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
+                                                           const CallExpr *CE,
+                                                           CheckerContext &C) {
   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
   // White list the internal communication protocols.
   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
-    return State;
-  State = State->addTaint(CE, C.getLocationContext());
-  return State;
-}
-
-ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
-                                               CheckerContext &C) const {
-  ProgramStateRef State = C.getState();
-  if (CE->getNumArgs() < 2)
-    return State;
-
-  // All arguments except for the very first one should get taint.
-  for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
-    // The arguments are pointer arguments. The data they are pointing at is
-    // tainted after the call.
-    const Expr *Arg = CE->getArg(i);
-    Optional<SVal> V = getPointedToSVal(C, Arg);
-    if (V)
-      State = State->addTaint(*V);
-  }
-  return State;
-}
+    return false;
 
-ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
-                                                  CheckerContext &C) const {
-  return C.getState()->addTaint(CE, C.getLocationContext());
+  return true;
 }
 
 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {




More information about the cfe-commits mailing list