r304162 - [analyzer] Support partially tainted records.

Artem Dergachev via cfe-commits cfe-commits at lists.llvm.org
Mon May 29 08:42:56 PDT 2017


Author: dergachev
Date: Mon May 29 10:42:56 2017
New Revision: 304162

URL: http://llvm.org/viewvc/llvm-project?rev=304162&view=rev
Log:
[analyzer] Support partially tainted records.

The analyzer's taint analysis can now reason about structures or arrays
originating from taint sources in which only certain sections are tainted.

In particular, it also benefits modeling functions like read(), which may
read tainted data into a section of a structure, but RegionStore is incapable of
expressing the fact that the rest of the structure remains intact, even if we
try to model read() directly.

Patch by Vlad Tsyrklevich!

Differential revision: https://reviews.llvm.org/D28445

Modified:
    cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
    cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h
    cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
    cfe/trunk/lib/StaticAnalyzer/Core/ProgramState.cpp
    cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp
    cfe/trunk/test/Analysis/taint-generic.c

Modified: cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h?rev=304162&r1=304161&r2=304162&view=diff
==============================================================================
--- cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h (original)
+++ cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h Mon May 29 10:42:56 2017
@@ -43,6 +43,9 @@ typedef std::unique_ptr<ConstraintManage
     ProgramStateManager &, SubEngine *);
 typedef std::unique_ptr<StoreManager>(*StoreManagerCreator)(
     ProgramStateManager &);
+typedef llvm::ImmutableMap<const SubRegion*, TaintTagType> TaintedSubRegions;
+typedef llvm::ImmutableMapRef<const SubRegion*, TaintTagType>
+  TaintedSubRegionsRef;
 
 //===----------------------------------------------------------------------===//
 // ProgramStateTrait - Traits used by the Generic Data Map of a ProgramState.
@@ -87,6 +90,7 @@ private:
   Store store;               // Maps a location to its current value.
   GenericDataMap   GDM;      // Custom data stored by a client of this class.
   unsigned refCount;
+  TaintedSubRegions::Factory TSRFactory;
 
   /// makeWithStore - Return a ProgramState with the same values as the current
   ///  state with the exception of using the specified Store.
@@ -343,6 +347,9 @@ public:
   ProgramStateRef addTaint(const Stmt *S, const LocationContext *LCtx,
                                TaintTagType Kind = TaintTagGeneric) const;
 
+  /// Create a new state in which the value is marked as tainted.
+  ProgramStateRef addTaint(SVal V, TaintTagType Kind = TaintTagGeneric) const;
+
   /// Create a new state in which the symbol is marked as tainted.
   ProgramStateRef addTaint(SymbolRef S,
                                TaintTagType Kind = TaintTagGeneric) const;
@@ -351,6 +358,14 @@ public:
   ProgramStateRef addTaint(const MemRegion *R,
                                TaintTagType Kind = TaintTagGeneric) const;
 
+  /// Create a new state in a which a sub-region of a given symbol is tainted.
+  /// This might be necessary when referring to regions that can not have an
+  /// individual symbol, e.g. if they are represented by the default binding of
+  /// a LazyCompoundVal.
+  ProgramStateRef addPartialTaint(SymbolRef ParentSym,
+                                  const SubRegion *SubRegion,
+                                  TaintTagType Kind = TaintTagGeneric) const;
+
   /// Check if the statement is tainted in the current state.
   bool isTainted(const Stmt *S, const LocationContext *LCtx,
                  TaintTagType Kind = TaintTagGeneric) const;

Modified: cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h?rev=304162&r1=304161&r2=304162&view=diff
==============================================================================
--- cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h (original)
+++ cfe/trunk/include/clang/StaticAnalyzer/Core/PathSensitive/TaintManager.h Mon May 29 10:42:56 2017
@@ -35,6 +35,16 @@ template<> struct ProgramStateTrait<Tain
   static void *GDMIndex() { static int index = 0; return &index; }
 };
 
+/// The GDM component mapping derived symbols' parent symbols to their
+/// underlying regions. This is used to efficiently check whether a symbol is
+/// tainted when it represents a sub-region of a tainted symbol.
+struct DerivedSymTaint {};
+typedef llvm::ImmutableMap<SymbolRef, TaintedSubRegionsRef> DerivedSymTaintImpl;
+template<> struct ProgramStateTrait<DerivedSymTaint>
+    :  public ProgramStatePartialTrait<DerivedSymTaintImpl> {
+  static void *GDMIndex() { static int index; return &index; }
+};
+
 class TaintManager {
 
   TaintManager() {}

Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp?rev=304162&r1=304161&r2=304162&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp Mon May 29 10:42:56 2017
@@ -65,21 +65,8 @@ private:
   /// and thus, is tainted.
   static bool isStdin(const Expr *E, CheckerContext &C);
 
-  /// This is called from getPointedToSymbol() to resolve symbol references for
-  /// the region underlying a LazyCompoundVal. This is the default binding
-  /// for the LCV, which could be a conjured symbol from a function call that
-  /// initialized the region. It only returns the conjured symbol if the LCV
-  /// covers the entire region, e.g. we avoid false positives by not returning
-  /// a default bindingc for an entire struct if the symbol for only a single
-  /// field or element within it is requested.
-  // TODO: Return an appropriate symbol for sub-fields/elements of an LCV so
-  // that they are also appropriately tainted.
-  static SymbolRef getLCVSymbol(CheckerContext &C,
-                                nonloc::LazyCompoundVal &LCV);
-
-  /// \brief Given a pointer argument, get the symbol of the value it contains
-  /// (points to).
-  static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
+  /// \brief Given a pointer argument, return the value it points to.
+  static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
 
   /// Functions defining the attack surface.
   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
@@ -186,9 +173,14 @@ private:
     static inline bool isTaintedOrPointsToTainted(const Expr *E,
                                                   ProgramStateRef State,
                                                   CheckerContext &C) {
-      return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
-              (E->getType().getTypePtr()->isPointerType() &&
-               State->isTainted(getPointedToSymbol(C, E))));
+      if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
+        return true;
+
+      if (!E->getType().getTypePtr()->isPointerType())
+        return false;
+
+      Optional<SVal> V = getPointedToSVal(C, E);
+      return (V && State->isTainted(*V));
     }
 
     /// \brief Pre-process a function which propagates taint according to the
@@ -400,9 +392,9 @@ bool GenericTaintChecker::propagateFromP
     if (CE->getNumArgs() < (ArgNum + 1))
       return false;
     const Expr* Arg = CE->getArg(ArgNum);
-    SymbolRef Sym = getPointedToSymbol(C, Arg);
-    if (Sym)
-      State = State->addTaint(Sym);
+    Optional<SVal> V = getPointedToSVal(C, Arg);
+    if (V)
+      State = State->addTaint(*V);
   }
 
   // Clear up the taint info from the state.
@@ -473,47 +465,20 @@ bool GenericTaintChecker::checkPre(const
   return false;
 }
 
-SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C,
-                                            nonloc::LazyCompoundVal &LCV) {
-  StoreManager &StoreMgr = C.getStoreManager();
-
-  // getLCVSymbol() is reached in a PostStmt so we can always expect a default
-  // binding to exist if one is present.
-  if (Optional<SVal> binding = StoreMgr.getDefaultBinding(LCV)) {
-    SymbolRef Sym = binding->getAsSymbol();
-    if (!Sym)
-      return nullptr;
-
-    // If the LCV covers an entire base region return the default conjured symbol.
-    if (LCV.getRegion() == LCV.getRegion()->getBaseRegion())
-      return Sym;
-  }
-
-  // Otherwise, return a nullptr as there's not yet a functional way to taint
-  // sub-regions of LCVs.
-  return nullptr;
-}
-
-SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
-                                                  const Expr* Arg) {
+Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
+                                            const Expr* Arg) {
   ProgramStateRef State = C.getState();
   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
   if (AddrVal.isUnknownOrUndef())
-    return nullptr;
+    return None;
 
   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
   if (!AddrLoc)
-    return nullptr;
+    return None;
 
   const PointerType *ArgTy =
     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
-  SVal Val = State->getSVal(*AddrLoc,
-                            ArgTy ? ArgTy->getPointeeType(): QualType());
-
-  if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>())
-    return getLCVSymbol(C, *LCV);
-
-  return Val.getAsSymbol();
+  return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType());
 }
 
 ProgramStateRef
@@ -633,9 +598,9 @@ ProgramStateRef GenericTaintChecker::pos
     // The arguments are pointer arguments. The data they are pointing at is
     // tainted after the call.
     const Expr* Arg = CE->getArg(i);
-        SymbolRef Sym = getPointedToSymbol(C, Arg);
-    if (Sym)
-      State = State->addTaint(Sym);
+    Optional<SVal> V = getPointedToSVal(C, Arg);
+    if (V)
+      State = State->addTaint(*V);
   }
   return State;
 }
@@ -710,10 +675,10 @@ bool GenericTaintChecker::generateReport
 
   // Check for taint.
   ProgramStateRef State = C.getState();
-  const SymbolRef PointedToSym = getPointedToSymbol(C, E);
+  Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
   SVal TaintedSVal;
-  if (State->isTainted(PointedToSym))
-    TaintedSVal = nonloc::SymbolVal(PointedToSym);
+  if (PointedToSVal && State->isTainted(*PointedToSVal))
+    TaintedSVal = *PointedToSVal;
   else if (State->isTainted(E, C.getLocationContext()))
     TaintedSVal = C.getSVal(E);
   else

Modified: cfe/trunk/lib/StaticAnalyzer/Core/ProgramState.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Core/ProgramState.cpp?rev=304162&r1=304161&r2=304162&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Core/ProgramState.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Core/ProgramState.cpp Mon May 29 10:42:56 2017
@@ -644,15 +644,33 @@ ProgramStateRef ProgramState::addTaint(c
   if (const Expr *E = dyn_cast_or_null<Expr>(S))
     S = E->IgnoreParens();
 
-  SymbolRef Sym = getSVal(S, LCtx).getAsSymbol();
+  return addTaint(getSVal(S, LCtx), Kind);
+}
+
+ProgramStateRef ProgramState::addTaint(SVal V,
+                                       TaintTagType Kind) const {
+  SymbolRef Sym = V.getAsSymbol();
   if (Sym)
     return addTaint(Sym, Kind);
 
-  const MemRegion *R = getSVal(S, LCtx).getAsRegion();
-  addTaint(R, Kind);
+  // If the SVal represents a structure, try to mass-taint all values within the
+  // structure. For now it only works efficiently on lazy compound values that
+  // were conjured during a conservative evaluation of a function - either as
+  // return values of functions that return structures or arrays by value, or as
+  // values of structures or arrays passed into the function by reference,
+  // directly or through pointer aliasing. Such lazy compound values are
+  // characterized by having exactly one binding in their captured store within
+  // their parent region, which is a conjured symbol default-bound to the base
+  // region of the parent region.
+  if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
+    if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) {
+      if (SymbolRef Sym = binding->getAsSymbol())
+        return addPartialTaint(Sym, LCV->getRegion(), Kind);
+    }
+  }
 
-  // Cannot add taint, so just return the state.
-  return this;
+  const MemRegion *R = V.getAsRegion();
+  return addTaint(R, Kind);
 }
 
 ProgramStateRef ProgramState::addTaint(const MemRegion *R,
@@ -674,6 +692,28 @@ ProgramStateRef ProgramState::addTaint(S
   return NewState;
 }
 
+ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym,
+                                              const SubRegion *SubRegion,
+                                              TaintTagType Kind) const {
+  // Ignore partial taint if the entire parent symbol is already tainted.
+  if (contains<TaintMap>(ParentSym) && *get<TaintMap>(ParentSym) == Kind)
+    return this;
+
+  // Partial taint applies if only a portion of the symbol is tainted.
+  if (SubRegion == SubRegion->getBaseRegion())
+    return addTaint(ParentSym, Kind);
+
+  TaintedSubRegionsRef TaintedSubRegions(0, TSRFactory.getTreeFactory());
+  if (const TaintedSubRegionsRef *SavedTaintedRegions =
+        get<DerivedSymTaint>(ParentSym))
+    TaintedSubRegions = *SavedTaintedRegions;
+
+  TaintedSubRegions = TaintedSubRegions.add(SubRegion, Kind);
+  ProgramStateRef NewState = set<DerivedSymTaint>(ParentSym, TaintedSubRegions);
+  assert(NewState);
+  return NewState;
+}
+
 bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx,
                              TaintTagType Kind) const {
   if (const Expr *E = dyn_cast_or_null<Expr>(S))
@@ -714,31 +754,54 @@ bool ProgramState::isTainted(SymbolRef S
     return false;
 
   // Traverse all the symbols this symbol depends on to see if any are tainted.
-  bool Tainted = false;
   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end();
        SI != SE; ++SI) {
     if (!isa<SymbolData>(*SI))
       continue;
 
-    const TaintTagType *Tag = get<TaintMap>(*SI);
-    Tainted = (Tag && *Tag == Kind);
-
-    // If this is a SymbolDerived with a tainted parent, it's also tainted.
-    if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI))
-      Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind);
+    if (const TaintTagType *Tag = get<TaintMap>(*SI)) {
+      if (*Tag == Kind)
+        return true;
+    }
+
+    if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) {
+      // If this is a SymbolDerived with a tainted parent, it's also tainted.
+      if (isTainted(SD->getParentSymbol(), Kind))
+        return true;
+
+      // If this is a SymbolDerived with the same parent symbol as another
+      // tainted SymbolDerived and a region that's a sub-region of that tainted
+      // symbol, it's also tainted.
+      if (const TaintedSubRegionsRef *SymRegions =
+            get<DerivedSymTaint>(SD->getParentSymbol())) {
+        const TypedValueRegion *R = SD->getRegion();
+        for (TaintedSubRegionsRef::iterator I = SymRegions->begin(),
+                                            E = SymRegions->end();
+             I != E; ++I) {
+          // FIXME: The logic to identify tainted regions could be more
+          // complete. For example, this would not currently identify
+          // overlapping fields in a union as tainted. To identify this we can
+          // check for overlapping/nested byte offsets.
+          if (Kind == I->second &&
+              (R == I->first || R->isSubRegionOf(I->first)))
+            return true;
+        }
+      }
+    }
 
     // If memory region is tainted, data is also tainted.
-    if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI))
-      Tainted = Tainted || isTainted(SRV->getRegion(), Kind);
-
-    // If If this is a SymbolCast from a tainted value, it's also tainted.
-    if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI))
-      Tainted = Tainted || isTainted(SC->getOperand(), Kind);
-
-    if (Tainted)
-      return true;
+    if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
+      if (isTainted(SRV->getRegion(), Kind))
+        return true;
+    }
+
+    // If this is a SymbolCast from a tainted value, it's also tainted.
+    if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) {
+      if (isTainted(SC->getOperand(), Kind))
+        return true;
+    }
   }
 
-  return Tainted;
+  return false;
 }
 

Modified: cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp?rev=304162&r1=304161&r2=304162&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp Mon May 29 10:42:56 2017
@@ -496,7 +496,10 @@ public: // Part of public interface to c
 
   Optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
     RegionBindingsRef B = getRegionBindings(S);
-    return B.getDefaultBinding(R);
+    // Default bindings are always applied over a base region so look up the
+    // base region's default binding, otherwise the lookup will fail when R
+    // is at an offset from R->getBaseRegion().
+    return B.getDefaultBinding(R->getBaseRegion());
   }
 
   SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType());

Modified: cfe/trunk/test/Analysis/taint-generic.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/taint-generic.c?rev=304162&r1=304161&r2=304162&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/taint-generic.c (original)
+++ cfe/trunk/test/Analysis/taint-generic.c Mon May 29 10:42:56 2017
@@ -192,20 +192,41 @@ void testStruct() {
 
 void testStructArray() {
   struct {
-    char buf[16];
-    struct {
-      int length;
-    } st[1];
-  } tainted;
+    int length;
+  } tainted[4];
 
-  char buffer[16];
+  char dstbuf[16], srcbuf[16];
   int sock;
 
   sock = socket(AF_INET, SOCK_STREAM, 0);
-  read(sock, &tainted.buf[0], sizeof(tainted.buf));
-  read(sock, &tainted.st[0], sizeof(tainted.st));
-  // FIXME: tainted.st[0].length should be marked tainted
-  __builtin_memcpy(buffer, tainted.buf, tainted.st[0].length); // no-warning
+  __builtin_memset(srcbuf, 0, sizeof(srcbuf));
+
+  read(sock, &tainted[0], sizeof(tainted));
+  __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
+
+  __builtin_memset(&tainted, 0, sizeof(tainted));
+  read(sock, &tainted, sizeof(tainted));
+  __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // expected-warning {{Untrusted data is used to specify the buffer size}}
+
+  __builtin_memset(&tainted, 0, sizeof(tainted));
+  // If we taint element 1, we should not raise an alert on taint for element 0 or element 2
+  read(sock, &tainted[1], sizeof(tainted));
+  __builtin_memcpy(dstbuf, srcbuf, tainted[0].length); // no-warning
+  __builtin_memcpy(dstbuf, srcbuf, tainted[2].length); // no-warning
+}
+
+void testUnion() {
+  union {
+    int x;
+    char y[4];
+  } tainted;
+
+  char buffer[4];
+
+  int sock = socket(AF_INET, SOCK_STREAM, 0);
+  read(sock, &tainted.y, sizeof(tainted.y));
+  // FIXME: overlapping regions aren't detected by isTainted yet
+  __builtin_memcpy(buffer, tainted.y, tainted.x);
 }
 
 int testDivByZero() {




More information about the cfe-commits mailing list