r261632 - [analyzer] Improve pointer arithmetic checker.

Gabor Horvath via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 23 04:34:40 PST 2016


Author: xazax
Date: Tue Feb 23 06:34:39 2016
New Revision: 261632

URL: http://llvm.org/viewvc/llvm-project?rev=261632&view=rev
Log:
[analyzer] Improve pointer arithmetic checker.

This patch is intended to improve pointer arithmetic checker.
>From now on it only warns when the pointer arithmetic is likely to cause an
error. For example when the pointer points to a single object, or an array of
derived types.

Differential Revision: http://reviews.llvm.org/D14203

Modified:
    cfe/trunk/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
    cfe/trunk/test/Analysis/PR24184.cpp
    cfe/trunk/test/Analysis/fields.c
    cfe/trunk/test/Analysis/ptr-arith.c
    cfe/trunk/test/Analysis/ptr-arith.cpp
    cfe/trunk/test/Analysis/rdar-6442306-1.m

Modified: cfe/trunk/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp?rev=261632&r1=261631&r2=261632&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp Tue Feb 23 06:34:39 2016
@@ -13,55 +13,329 @@
 //===----------------------------------------------------------------------===//
 
 #include "ClangSACheckers.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "llvm/ADT/SmallVector.h"
 
 using namespace clang;
 using namespace ento;
 
 namespace {
+enum class AllocKind {
+  SingleObject,
+  Array,
+  Unknown,
+  Reinterpreted // Single object interpreted as an array.
+};
+} // end namespace
+
+namespace llvm {
+template <> struct FoldingSetTrait<AllocKind> {
+  static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
+    ID.AddInteger(static_cast<int>(X));
+  }
+};
+} // end namespace llvm
+
+namespace {
 class PointerArithChecker
-  : public Checker< check::PreStmt<BinaryOperator> > {
-  mutable std::unique_ptr<BuiltinBug> BT;
+    : public Checker<
+          check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
+          check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
+          check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
+          check::PostStmt<CallExpr>, check::DeadSymbols> {
+  AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
+  const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
+                                  AllocKind &AKind, CheckerContext &C) const;
+  const MemRegion *getPointedRegion(const MemRegion *Region,
+                                    CheckerContext &C) const;
+  void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
+                                bool PointedNeeded = false) const;
+  void initAllocIdentifiers(ASTContext &C) const;
+
+  mutable std::unique_ptr<BuiltinBug> BT_pointerArith;
+  mutable std::unique_ptr<BuiltinBug> BT_polyArray;
+  mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions;
 
 public:
-  void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
+  void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
+  void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
+  void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
+  void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
+  void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
+  void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
+  void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
+  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
 };
+} // end namespace
+
+REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind)
+
+void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
+                                           CheckerContext &C) const {
+  // TODO: intentional leak. Some information is garbage collected too early,
+  // see http://reviews.llvm.org/D14203 for further information.
+  /*ProgramStateRef State = C.getState();
+  RegionStateTy RegionStates = State->get<RegionState>();
+  for (RegionStateTy::iterator I = RegionStates.begin(), E = RegionStates.end();
+       I != E; ++I) {
+    if (!SR.isLiveRegion(I->first))
+      State = State->remove<RegionState>(I->first);
+  }
+  C.addTransition(State);*/
 }
 
-void PointerArithChecker::checkPreStmt(const BinaryOperator *B,
-                                       CheckerContext &C) const {
-  if (B->getOpcode() != BO_Sub && B->getOpcode() != BO_Add)
-    return;
+AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
+                                              const FunctionDecl *FD) const {
+  // This checker try not to assume anything about placement and overloaded
+  // new to avoid false positives.
+  if (isa<CXXMethodDecl>(FD))
+    return AllocKind::Unknown;
+  if (FD->getNumParams() != 1 || FD->isVariadic())
+    return AllocKind::Unknown;
+  if (NE->isArray())
+    return AllocKind::Array;
 
-  ProgramStateRef state = C.getState();
-  const LocationContext *LCtx = C.getLocationContext();
-  SVal LV = state->getSVal(B->getLHS(), LCtx);
-  SVal RV = state->getSVal(B->getRHS(), LCtx);
+  return AllocKind::SingleObject;
+}
 
-  const MemRegion *LR = LV.getAsRegion();
+const MemRegion *
+PointerArithChecker::getPointedRegion(const MemRegion *Region,
+                                      CheckerContext &C) const {
+  assert(Region);
+  ProgramStateRef State = C.getState();
+  SVal S = State->getSVal(Region);
+  return S.getAsRegion();
+}
+
+/// Checks whether a region is the part of an array.
+/// In case there is a dericed to base cast above the array element, the
+/// Polymorphic output value is set to true. AKind output value is set to the
+/// allocation kind of the inspected region.
+const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
+                                                     bool &Polymorphic,
+                                                     AllocKind &AKind,
+                                                     CheckerContext &C) const {
+  assert(Region);
+  while (Region->getKind() == MemRegion::Kind::CXXBaseObjectRegionKind) {
+    Region = Region->getAs<CXXBaseObjectRegion>()->getSuperRegion();
+    Polymorphic = true;
+  }
+  if (Region->getKind() == MemRegion::Kind::ElementRegionKind) {
+    Region = Region->getAs<ElementRegion>()->getSuperRegion();
+  }
+
+  ProgramStateRef State = C.getState();
+  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
+    AKind = *Kind;
+    if (*Kind == AllocKind::Array)
+      return Region;
+    else
+      return nullptr;
+  }
+  // When the region is symbolic and we do not have any information about it,
+  // assume that this is an array to avoid false positives.
+  if (Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
+    return Region;
+
+  // No AllocKind stored and not symbolic, assume that it points to a single
+  // object.
+  return nullptr;
+}
 
-  if (!LR || !RV.isConstant())
+void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
+                                                   CheckerContext &C,
+                                                   bool PointedNeeded) const {
+  SourceRange SR = E->getSourceRange();
+  if (SR.isInvalid())
     return;
 
-  // If pointer arithmetic is done on variables of non-array type, this often
-  // means behavior rely on memory organization, which is dangerous.
-  if (isa<VarRegion>(LR) || isa<CodeTextRegion>(LR) ||
-      isa<CompoundLiteralRegion>(LR)) {
+  ProgramStateRef State = C.getState();
+  const MemRegion *Region =
+      State->getSVal(E, C.getLocationContext()).getAsRegion();
+  if (!Region)
+    return;
+  if (PointedNeeded)
+    Region = getPointedRegion(Region, C);
+  if (!Region)
+    return;
 
+  bool IsPolymorphic = false;
+  AllocKind Kind = AllocKind::Unknown;
+  if (const MemRegion *ArrayRegion =
+          getArrayRegion(Region, IsPolymorphic, Kind, C)) {
+    if (!IsPolymorphic)
+      return;
     if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
-      if (!BT)
-        BT.reset(
-            new BuiltinBug(this, "Dangerous pointer arithmetic",
-                           "Pointer arithmetic done on non-array variables "
-                           "means reliance on memory layout, which is "
-                           "dangerous."));
-      auto R = llvm::make_unique<BugReport>(*BT, BT->getDescription(), N);
-      R->addRange(B->getSourceRange());
+      if (!BT_polyArray)
+        BT_polyArray.reset(new BuiltinBug(
+            this, "Dangerous pointer arithmetic",
+            "Pointer arithmetic on a pointer to base class is dangerous "
+            "because derived and base class may have different size."));
+      auto R = llvm::make_unique<BugReport>(*BT_polyArray,
+                                            BT_polyArray->getDescription(), N);
+      R->addRange(E->getSourceRange());
+      R->markInteresting(ArrayRegion);
       C.emitReport(std::move(R));
     }
+    return;
+  }
+
+  if (Kind == AllocKind::Reinterpreted)
+    return;
+
+  // We might not have enough information about symbolic regions.
+  if (Kind != AllocKind::SingleObject &&
+      Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
+    return;
+
+  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
+    if (!BT_pointerArith)
+      BT_pointerArith.reset(new BuiltinBug(this, "Dangerous pointer arithmetic",
+                                           "Pointer arithmetic on non-array "
+                                           "variables relies on memory layout, "
+                                           "which is dangerous."));
+    auto R = llvm::make_unique<BugReport>(*BT_pointerArith,
+                                          BT_pointerArith->getDescription(), N);
+    R->addRange(SR);
+    R->markInteresting(Region);
+    C.emitReport(std::move(R));
+  }
+}
+
+void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
+  if (!AllocFunctions.empty())
+    return;
+  AllocFunctions.insert(&C.Idents.get("alloca"));
+  AllocFunctions.insert(&C.Idents.get("malloc"));
+  AllocFunctions.insert(&C.Idents.get("realloc"));
+  AllocFunctions.insert(&C.Idents.get("calloc"));
+  AllocFunctions.insert(&C.Idents.get("valloc"));
+}
+
+void PointerArithChecker::checkPostStmt(const CallExpr *CE,
+                                        CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  const FunctionDecl *FD = C.getCalleeDecl(CE);
+  if (!FD)
+    return;
+  IdentifierInfo *FunI = FD->getIdentifier();
+  initAllocIdentifiers(C.getASTContext());
+  if (AllocFunctions.count(FunI) == 0)
+    return;
+
+  SVal SV = State->getSVal(CE, C.getLocationContext());
+  const MemRegion *Region = SV.getAsRegion();
+  if (!Region)
+    return;
+  // Assume that C allocation functions allocate arrays to avoid false
+  // positives.
+  // TODO: Add heuristics to distinguish alloc calls that allocates single
+  // objecs.
+  State = State->set<RegionState>(Region, AllocKind::Array);
+  C.addTransition(State);
+}
+
+void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
+                                        CheckerContext &C) const {
+  const FunctionDecl *FD = NE->getOperatorNew();
+  if (!FD)
+    return;
+
+  AllocKind Kind = getKindOfNewOp(NE, FD);
+
+  ProgramStateRef State = C.getState();
+  SVal AllocedVal = State->getSVal(NE, C.getLocationContext());
+  const MemRegion *Region = AllocedVal.getAsRegion();
+  if (!Region)
+    return;
+  State = State->set<RegionState>(Region, Kind);
+  C.addTransition(State);
+}
+
+void PointerArithChecker::checkPostStmt(const CastExpr *CE,
+                                        CheckerContext &C) const {
+  if (CE->getCastKind() != CastKind::CK_BitCast)
+    return;
+
+  const Expr *CastedExpr = CE->getSubExpr();
+  ProgramStateRef State = C.getState();
+  SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
+
+  const MemRegion *Region = CastedVal.getAsRegion();
+  if (!Region)
+    return;
+
+  // Suppress reinterpret casted hits.
+  State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
+  C.addTransition(State);
+}
+
+void PointerArithChecker::checkPreStmt(const CastExpr *CE,
+                                       CheckerContext &C) const {
+  if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
+    return;
+
+  const Expr *CastedExpr = CE->getSubExpr();
+  ProgramStateRef State = C.getState();
+  SVal CastedVal = State->getSVal(CastedExpr, C.getLocationContext());
+
+  const MemRegion *Region = CastedVal.getAsRegion();
+  if (!Region)
+    return;
+
+  if (const AllocKind *Kind = State->get<RegionState>(Region)) {
+    if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
+      return;
+  }
+  State = State->set<RegionState>(Region, AllocKind::Array);
+  C.addTransition(State);
+}
+
+void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
+                                       CheckerContext &C) const {
+  if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
+    return;
+  reportPointerArithMisuse(UOp->getSubExpr(), C, true);
+}
+
+void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
+                                       CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  SVal Idx = State->getSVal(SubsExpr->getIdx(), C.getLocationContext());
+
+  // Indexing with 0 is OK.
+  if (Idx.isZeroConstant())
+    return;
+  reportPointerArithMisuse(SubsExpr->getBase(), C);
+}
+
+void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
+                                       CheckerContext &C) const {
+  BinaryOperatorKind OpKind = BOp->getOpcode();
+  if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
+    return;
+
+  const Expr *Lhs = BOp->getLHS();
+  const Expr *Rhs = BOp->getRHS();
+  ProgramStateRef State = C.getState();
+
+  if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
+    SVal RHSVal = State->getSVal(Rhs, C.getLocationContext());
+    if (State->isNull(RHSVal).isConstrainedTrue())
+      return;
+    reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
+  }
+  // The int += ptr; case is not valid C++.
+  if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
+    SVal LHSVal = State->getSVal(Lhs, C.getLocationContext());
+    if (State->isNull(LHSVal).isConstrainedTrue())
+      return;
+    reportPointerArithMisuse(Rhs, C);
   }
 }
 

Modified: cfe/trunk/test/Analysis/PR24184.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/PR24184.cpp?rev=261632&r1=261631&r2=261632&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/PR24184.cpp (original)
+++ cfe/trunk/test/Analysis/PR24184.cpp Tue Feb 23 06:34:39 2016
@@ -12,7 +12,7 @@ int a;
 typedef int *vcreate_t(int *, DATA_TYPE, int, int);
 void fn1(unsigned, unsigned) {
   char b = 0;
-  for (; 1; a++, &b + a * 0) // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}}
+  for (; 1; a++, &b + a * 0)
     ;
 }
 
@@ -55,7 +55,7 @@ void fn1_1(void *p1, const void *p2) { p
 void fn2_1(uint32_t *p1, unsigned char *p2, uint32_t p3) {
   unsigned i = 0;
   for (0; i < p3; i++)
-    fn1_1(p1 + i, p2 + i * 0);    // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}}
+    fn1_1(p1 + i, p2 + i * 0);
 }
 
 struct A_1 {

Modified: cfe/trunk/test/Analysis/fields.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/fields.c?rev=261632&r1=261631&r2=261632&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/fields.c (original)
+++ cfe/trunk/test/Analysis/fields.c Tue Feb 23 06:34:39 2016
@@ -16,7 +16,7 @@ struct s {
 
 void f() {
   struct s a;
-  int *p = &(a.n) + 1;
+  int *p = &(a.n) + 1; // expected-warning{{Pointer arithmetic on}}
 }
 
 typedef struct {

Modified: cfe/trunk/test/Analysis/ptr-arith.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/ptr-arith.c?rev=261632&r1=261631&r2=261632&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/ptr-arith.c (original)
+++ cfe/trunk/test/Analysis/ptr-arith.c Tue Feb 23 06:34:39 2016
@@ -52,7 +52,7 @@ void f4() {
 void f5() {
   int x, y;
   int *p;
-  p = &x + 1;  // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}}
+  p = &x + 1;  // expected-warning{{Pointer arithmetic on non-array variables relies on memory layout, which is dangerous}}
 
   int a[10];
   p = a + 1; // no-warning
@@ -75,7 +75,7 @@ start:
   clang_analyzer_eval(&a != 0); // expected-warning{{TRUE}}
   clang_analyzer_eval(&a >= 0); // expected-warning{{TRUE}}
   clang_analyzer_eval(&a > 0); // expected-warning{{TRUE}}
-  clang_analyzer_eval((&a - 0) != 0); // expected-warning{{TRUE}} expected-warning{{Pointer arithmetic done on non-array variables}}
+  clang_analyzer_eval((&a - 0) != 0); // expected-warning{{TRUE}}
 
   // LHS is NULL, RHS is non-symbolic
   // The same code is used for labels and non-symbolic values.

Modified: cfe/trunk/test/Analysis/ptr-arith.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/ptr-arith.cpp?rev=261632&r1=261631&r2=261632&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/ptr-arith.cpp (original)
+++ cfe/trunk/test/Analysis/ptr-arith.cpp Tue Feb 23 06:34:39 2016
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=core,debug.ExprInspection -verify %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -Wno-unused-value -std=c++14 -analyze -analyzer-checker=core,debug.ExprInspection,alpha.core.PointerArithm -verify %s
 struct X {
   int *p;
   int zero;
@@ -20,3 +19,82 @@ int test (int *in) {
   return 5/littleX.zero; // no-warning
 }
 
+
+class Base {};
+class Derived : public Base {};
+
+void checkPolymorphicUse() {
+  Derived d[10];
+
+  Base *p = d;
+  ++p; // expected-warning{{Pointer arithmetic on a pointer to base class is dangerous}}
+}
+
+void checkBitCasts() {
+  long l;
+  char *p = (char*)&l;
+  p = p+2;
+}
+
+void checkBasicarithmetic(int i) {
+  int t[10];
+  int *p = t;
+  ++p;
+  int a = 5;
+  p = &a;
+  ++p; // expected-warning{{Pointer arithmetic on non-array variables relies on memory layout, which is dangerous}}
+  p = p + 2; // expected-warning{{}}
+  p = 2 + p; // expected-warning{{}}
+  p += 2; // expected-warning{{}}
+  a += p[2]; // expected-warning{{}}
+  p = i*0 + p;
+  p = p + i*0;
+  p += i*0;
+}
+
+void checkArithOnSymbolic(int*p) {
+  ++p;
+  p = p + 2;
+  p = 2 + p;
+  p += 2;
+  (void)p[2];
+}
+
+struct S {
+  int t[10];
+};
+
+void arrayInStruct() {
+  S s;
+  int * p = s.t;
+  ++p;
+  S *sp = new S;
+  p = sp->t;
+  ++p;
+  delete sp;
+}
+
+void checkNew() {
+  int *p = new int;
+  p[1] = 1; // expected-warning{{}}
+}
+
+void InitState(int* state) {
+    state[1] = 1; // expected-warning{{}}
+}
+
+int* getArray(int size) {
+    if (size == 0)
+      return new int;
+    return new int[5];
+}
+
+void checkConditionalArray() {
+    int* maybeArray = getArray(0);
+    InitState(maybeArray);
+}
+
+void checkMultiDimansionalArray() {
+  int a[5][5];
+   *(*(a+1)+2) = 2;
+}

Modified: cfe/trunk/test/Analysis/rdar-6442306-1.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/rdar-6442306-1.m?rev=261632&r1=261631&r2=261632&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/rdar-6442306-1.m (original)
+++ cfe/trunk/test/Analysis/rdar-6442306-1.m Tue Feb 23 06:34:39 2016
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core %s -analyzer-store=region -verify
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core -analyzer-disable-checker=alpha.core.PointerArithm %s -analyzer-store=region -verify
 // expected-no-diagnostics
 
 typedef int bar_return_t;




More information about the cfe-commits mailing list