[cfe-dev] Analyzer: Extract region-walking behavior from CallAndMessageChecker

Jordy Rose jediknil at belkadan.com
Wed Jun 23 00:05:13 PDT 2010


CallAndMessageChecker currently has a simple check to see if any of the
members of a pass-by-value struct are uninitialized. It handles nested
structs, but not structs containing arrays.

I propose to extract this region-walking behavior out into a new class,
RegionWalker. This would then be the basis for the current pass-by-value
check and a possible new one (see below). My implementation of RegionWalker
is loosely modeled after RecursiveASTVisitor (see attached patch), and
supports both structs and arrays, and both nested.


The additional check I propose is for passing pointers to undefined values
in an argument slot marked as "const". Clearly, the argument value is not
useful, and for a const argument it can't be /set/ by the method either.
(The exception would be "const void *", which is both the type of
Objective-C "id" and sometimes an indicator that the pointer won't be
dereferenced by the callee.)

For the case of an array or struct (such as the source buffer for
strcpy()), we have to be a little more cautious -- the pointer might be
useful if /any/ of the fields are initialized. This is where RegionWalker
would come in.

But that's for a later patch.

Suggestions, comments? Is this overkill? (Especially if the new check
doesn't seem like a good idea.)
Jordy
-------------- next part --------------
Index: lib/Checker/RegionWalker.h
===================================================================
--- lib/Checker/RegionWalker.h	(revision 0)
+++ lib/Checker/RegionWalker.h	(revision 0)
@@ -0,0 +1,143 @@
+//== RegionWalker.h ---------------------------------------------*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the RegionWalker interface, which recursively
+//  traverses the subregions of a MemRegion.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_CHECKER_REGIONWALKER_H
+#define LLVM_CLANG_CHECKER_REGIONWALKER_H
+
+#include "clang/Checker/PathSensitive/ValueManager.h"
+
+namespace clang {
+
+/// \brief A visitor that recursively walks the subregions of a given region.
+///
+/// Clients of this visitor should subclass the visitor (providing themselves
+/// as the template argument, using the curiously recurring template pattern)
+/// and override any of the Visit* or Traverse* methods (except Traverse
+/// itself) to customize behavior. Returning "false" from one of these
+/// overridden functions will abort the entire traversal.
+///
+/// RegionWalker currently only traverses records and constant-size arrays,
+/// and treats all other values as opaque.
+/// FIXME: It may be possible to check the extent of a variable-length array
+/// and use that to calculate a size.
+template<typename Derived>
+class RegionWalker {
+private:
+  inline Derived *getDerived() { return static_cast<Derived*>(this); }
+public:
+  // A ValueManager is necessary to walk a region.
+  inline ValueManager &getValueManager() {
+    return getDerived()->getValueManager();
+  }
+
+  // Called to traverse the subregions of the given region, if possible,
+  // by dispatching to the appropriate Traverse* or Visit* method.
+  bool Traverse(const MemRegion *MR, QualType Ty);
+
+  // Visits each element of a constant-sized array (or a region treated as a
+  // constant-sized array).
+  bool TraverseArray(const MemRegion *MR, QualType EleTy,
+                     const llvm::APInt& Count);
+  bool VisitArray(const MemRegion *MR, QualType EleTy,
+                  const llvm::APInt& Count) {
+    return true;
+  }
+
+  // Visits each field of a record type (structure, class, or union).
+  bool TraverseRecord(const MemRegion *MR, QualType Ty);
+  bool VisitRecord(const MemRegion *MR, QualType Ty) { return true; }
+  
+  // Called when the walker reaches a scalar value.
+  bool VisitScalar(const MemRegion *MR, QualType Ty) { return true; }
+  
+  // Called when the walker reaches a region it doesn't know how to traverse.
+  // Clients can override this to take additional action for other types.
+  bool VisitUnknown(const MemRegion *MR, QualType Ty) { return true; }
+
+  // Called before visiting any subregion.
+  bool PreVisitSubregion(const TypedRegion *R) { return true; }
+  bool PostVisitSubregion(const TypedRegion *R) { return true; }
+};
+
+template<typename Derived>
+bool RegionWalker<Derived>::Traverse(const MemRegion *MR, QualType Ty) {
+  if (Ty->isConstantArrayType()) {
+    CanQualType CanTy = Ty->getCanonicalTypeUnqualified();
+    CanQual<ConstantArrayType> AT = CanTy->getAs<ConstantArrayType>();
+    QualType EleTy = AT->getElementType();
+    const llvm::APInt &Count = AT->getSize();
+    return getDerived()->TraverseArray(MR, EleTy, Count);
+  } else if (Ty->isRecordType())
+    return getDerived()->TraverseRecord(MR, Ty);
+  else if (Ty->isScalarType())
+    return getDerived()->VisitScalar(MR, Ty);
+  else
+    return getDerived()->VisitUnknown(MR, Ty);
+}
+
+template<typename Derived>
+bool RegionWalker<Derived>::TraverseArray(const MemRegion *R, QualType EleTy,
+                                          const llvm::APInt& Count) {
+  if (!getDerived()->VisitArray(R, EleTy, Count))
+    return false;
+
+  ValueManager &ValMgr = getValueManager();
+  MemRegionManager &MemMgr = ValMgr.getRegionManager();
+  ASTContext &Ctx = ValMgr.getContext();
+
+  uint64_t Limit = Count.getZExtValue();
+  for (uint64_t Index = 0; Index < Limit; ++Index) {
+    SVal IndexVal = ValMgr.makeArrayIndex(Index);
+    const ElementRegion *ER = MemMgr.getElementRegion(EleTy, IndexVal, R, Ctx);
+
+    if (!getDerived()->PreVisitSubregion(ER))
+      return false;
+    if (!Traverse(ER, EleTy))
+      return false;
+    if (!getDerived()->PostVisitSubregion(ER))
+      return false;
+  }
+
+  return true;
+}
+
+template<typename Derived>
+bool RegionWalker<Derived>::TraverseRecord(const MemRegion *R, QualType Ty) {
+  if (!getDerived()->VisitRecord(R, Ty))
+    return false;
+
+  const RecordDecl *RD = Ty->getAs<RecordType>()->getDecl()->getDefinition();
+  assert(RD && "Referred record has no definition");
+
+  ValueManager &ValMgr = getValueManager();
+  MemRegionManager &MemMgr = ValMgr.getRegionManager();
+  ASTContext &Ctx = ValMgr.getContext();
+
+  for (RecordDecl::field_iterator I = RD->field_begin(),
+       E = RD->field_end(); I != E; ++I) {
+    const FieldRegion *FR = MemMgr.getFieldRegion(*I, R);
+
+    if (!getDerived()->PreVisitSubregion(FR))
+      return false;
+    if (!Traverse(FR, FR->getValueType(Ctx)))
+      return false;
+    if (!getDerived()->PostVisitSubregion(FR))
+      return false;
+  }
+
+  return true;
+}
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_CHECKER_REGIONWALKER_H
Index: lib/Checker/CallAndMessageChecker.cpp
===================================================================
--- lib/Checker/CallAndMessageChecker.cpp	(revision 106610)
+++ lib/Checker/CallAndMessageChecker.cpp	(working copy)
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "GRExprEngineInternalChecks.h"
+#include "RegionWalker.h"
 #include "clang/AST/ParentMap.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Checker/BugReporter/BugType.h"
@@ -59,8 +60,81 @@
       BT = new BuiltinBug(desc);
   }
 };
+
+class UninitializedFieldFinder : public RegionWalker<UninitializedFieldFinder> {
+  CheckerContext &C;
+  llvm::SmallVector<const TypedRegion *, 10> SubregionChain;
+public:
+  UninitializedFieldFinder(CheckerContext &CC) : C(CC) {}
+
+  ValueManager &getValueManager() { return C.getValueManager(); }
+
+  bool PreVisitSubregion(const TypedRegion *R) {
+    SubregionChain.push_back(R);
+    return true;
+  }
+  bool PostVisitSubregion(const TypedRegion *R) {
+    assert(R == SubregionChain.back() && "Push and pop not balanced");
+    SubregionChain.pop_back();
+    return true;
+  }
+
+  bool VisitScalar(const MemRegion *MR, QualType Ty);
+
+  void AppendTrackback(llvm::raw_ostream &os);
+};
 } // end anonymous namespace
 
+bool UninitializedFieldFinder::VisitScalar(const MemRegion *MR, QualType Ty) {
+  // If we find an undefined scalar type, we're done!
+  if (C.getState()->getSVal(MR).isUndef())
+    return false;
+
+  // Otherwise, keep looking.
+  return true;
+}
+
+void UninitializedFieldFinder::AppendTrackback(llvm::raw_ostream &os) {
+  assert(!SubregionChain.empty() && "No trackback to print");
+  if (SubregionChain.size() == 1) {
+    const TypedRegion *TR = SubregionChain[0];
+    if (const FieldRegion *FR = dyn_cast<FieldRegion>(TR)) {
+      os << " (e.g., field: '" << FR->getDecl() << "')";
+    } else if (const ElementRegion *ER = dyn_cast<ElementRegion>(TR)) {
+      // This should never be used, since arrays cannot be passed by value.
+      // It's here in case UninitializedFieldFinder is used for something else
+      // in the future.
+      nonloc::ConcreteInt Index = cast<nonloc::ConcreteInt>(ER->getIndex());
+      os << " (e.g., element " << Index.getValue().getZExtValue() << ")";
+    }
+    // If RegionWalker is ever extended to handle other subregions, they won't
+    // receive a summary here.
+
+  } else {
+    os << " (e.g., via the access chain: '";
+    bool first = true;
+    for (llvm::SmallVectorImpl<const TypedRegion *>::iterator
+         I = SubregionChain.begin(), E = SubregionChain.end(); I!=E; ++I) {
+      if (const FieldRegion *FR = dyn_cast<FieldRegion>(*I)) {
+        if (!first)
+          os << '.';
+        os << FR->getDecl();
+      } else if (const ElementRegion *ER = dyn_cast<ElementRegion>(*I)) {
+        nonloc::ConcreteInt Index = cast<nonloc::ConcreteInt>(ER->getIndex());
+        os << "[" << Index.getValue().getZExtValue() << "]";
+      } else {
+        // If RegionWalker is ever extended to handle other subregions, this
+        // will be better than nothing.
+        if (!first)
+          os << '.';
+        os << "<unknown>";
+      }
+      first = false;
+    }
+    os << "')";
+  }
+}
+
 void clang::RegisterCallAndMessageChecker(GRExprEngine &Eng) {
   Eng.registerCheck(new CallAndMessageChecker());
 }
@@ -99,82 +173,21 @@
 
   if (const nonloc::LazyCompoundVal *LV =
         dyn_cast<nonloc::LazyCompoundVal>(&V)) {
+    const TypedRegion *R = LV->getRegion();
 
-    class FindUninitializedField {
-    public:
-      llvm::SmallVector<const FieldDecl *, 10> FieldChain;
-    private:
-      ASTContext &C;
-      StoreManager &StoreMgr;
-      MemRegionManager &MrMgr;
-      Store store;
-    public:
-      FindUninitializedField(ASTContext &c, StoreManager &storeMgr,
-                             MemRegionManager &mrMgr, Store s)
-      : C(c), StoreMgr(storeMgr), MrMgr(mrMgr), store(s) {}
-
-      bool Find(const TypedRegion *R) {
-        QualType T = R->getValueType(C);
-        if (const RecordType *RT = T->getAsStructureType()) {
-          const RecordDecl *RD = RT->getDecl()->getDefinition();
-          assert(RD && "Referred record has no definition");
-          for (RecordDecl::field_iterator I =
-               RD->field_begin(), E = RD->field_end(); I!=E; ++I) {
-            const FieldRegion *FR = MrMgr.getFieldRegion(*I, R);
-            FieldChain.push_back(*I);
-            T = (*I)->getType();
-            if (T->getAsStructureType()) {
-              if (Find(FR))
-                return true;
-            }
-            else {
-              const SVal &V = StoreMgr.Retrieve(store, loc::MemRegionVal(FR));
-              if (V.isUndef())
-                return true;
-            }
-            FieldChain.pop_back();
-          }
-        }
-
-        return false;
-      }
-    };
-
-    const LazyCompoundValData *D = LV->getCVData();
-    FindUninitializedField F(C.getASTContext(),
-                             C.getState()->getStateManager().getStoreManager(),
-                             C.getValueManager().getRegionManager(),
-                             D->getStore());
-
-    if (F.Find(D->getRegion())) {
+    UninitializedFieldFinder F(C);
+    if (!F.Traverse(R, R->getValueType(C.getASTContext()))) {
       if (ExplodedNode *N = C.GenerateSink()) {
         LazyInit_BT(BT_desc, BT);
         llvm::SmallString<512> Str;
         llvm::raw_svector_ostream os(Str);
         os << "Passed-by-value struct argument contains uninitialized data";
+        F.AppendTrackback(os);
 
-        if (F.FieldChain.size() == 1)
-          os << " (e.g., field: '" << F.FieldChain[0] << "')";
-        else {
-          os << " (e.g., via the field chain: '";
-          bool first = true;
-          for (llvm::SmallVectorImpl<const FieldDecl *>::iterator
-               DI = F.FieldChain.begin(), DE = F.FieldChain.end(); DI!=DE;++DI){
-            if (first)
-              first = false;
-            else
-              os << '.';
-            os << *DI;
-          }
-          os << "')";
-        }
-
         // Generate a report for this bug.
         EnhancedBugReport *R = new EnhancedBugReport(*BT, os.str(), N);
         R->addRange(Ex->getSourceRange());
 
-        // FIXME: enhance track back for uninitialized value for arbitrary
-        // memregions
         C.EmitReport(R);
       }
       return true;
Index: test/Analysis/uninit-vals-ps-region.m
===================================================================
--- test/Analysis/uninit-vals-ps-region.m	(revision 106610)
+++ test/Analysis/uninit-vals-ps-region.m	(working copy)
@@ -59,6 +59,34 @@
   [o passVal:x]; // expected-warning{{Passed-by-value struct argument contains uninitialized data (e.g., field: 'x')}}
 }
 
+struct ArrayCarrier { int x[2]; };
+extern void test_uninit_struct_with_array_aux(struct ArrayCarrier arg);
+void test_uninit_struct_with_array() {
+	struct ArrayCarrier a;
+	test_uninit_struct_with_array_aux(a); // expected-warning{{Passed-by-value struct argument contains uninitialized data (e.g., via the access chain: 'x[0]')}}
+}
+
+struct SimpleNestedStruct { struct TestUninit a; };
+extern void test_uninit_struct_with_nested_aux(struct SimpleNestedStruct arg);
+void test_uninit_struct_with_nested() {
+	struct SimpleNestedStruct a;
+	test_uninit_struct_with_nested_aux(a); // expected-warning{{Passed-by-value struct argument contains uninitialized data (e.g., via the access chain: 'a.x')}}
+}
+
+void test_uninit_struct_with_initializer() {
+	struct TestUninit elem;
+	struct SimpleNestedStruct a = { elem };
+	test_uninit_struct_with_nested_aux(a); // expected-warning{{Passed-by-value struct argument contains uninitialized data (e.g., via the access chain: 'a.x')}}
+}
+
+struct ComplexNestedStruct { struct ArrayCarrier a[2]; };
+extern void test_uninit_complex_struct_aux(struct ComplexNestedStruct arg);
+void test_uninit_complex_struct() {
+	struct ComplexNestedStruct a;
+	a.a[0].x[0] = 1;
+	test_uninit_complex_struct_aux(a); // expected-warning{{Passed-by-value struct argument contains uninitialized data (e.g., via the access chain: 'a[0].x[1]')}}
+}
+
 // Test case from <rdar://problem/7780304>.  That shows an uninitialized value
 // being used in the LHS of a compound assignment.
 void rdar_7780304() {


More information about the cfe-dev mailing list