r179767 - [analyzer] "Force" LazyCompoundVals on bind when they are simple enough.

Jordan Rose jordan_rose at apple.com
Thu Apr 18 09:33:46 PDT 2013


Author: jrose
Date: Thu Apr 18 11:33:46 2013
New Revision: 179767

URL: http://llvm.org/viewvc/llvm-project?rev=179767&view=rev
Log:
[analyzer] "Force" LazyCompoundVals on bind when they are simple enough.

The analyzer uses LazyCompoundVals to represent rvalues of aggregate types,
most importantly structs and arrays. This allows us to efficiently copy
around an entire struct, rather than doing a memberwise load every time a
struct rvalue is encountered. This can also keep memory usage down by
allowing several structs to "share" the same snapshotted bindings.

However, /lookup/ through LazyCompoundVals can be expensive, especially
since they can end up chaining back to the original value. While we try
to reuse LazyCompoundVals whenever it's safe, and cache information about
this transitivity, the fact is it's sometimes just not a good idea to
perpetuate LazyCompoundVals -- the tradeoffs just aren't worth it.

This commit changes RegionStore so that binding a LazyCompoundVal to struct
will do a memberwise copy if the struct is simple enough. Today's definition
of "simple enough" is "up to N scalar members" (see below), but that could
easily be changed in the future. This is enough to bring the test case in
PR15697 back down to a manageable analysis time (within 20% of its original
time, in an unfair test where the new analyzer is not compiled with LTO).

The actual value of "N" is controlled by a new -analyzer-config option,
'region-store-small-struct-limit'. It defaults to "2", meaning structs with
zero, one, or two scalar members will be considered "simple enough" for
this code path.

It's worth noting that a more straightforward implementation would do this
on load, not on bind, and make use of the structure we already have for this:
CompoundVal. A long time ago, this was actually how RegionStore modeled
aggregate-to-aggregate copies, but today it's only used for compound literals.
Unfortunately, it seems that we've special-cased LazyCompoundVal in certain
places (such as liveness checks) but failed to similarly special-case
CompoundVal in all of them. Until we're confident that CompoundVal is
handled properly everywhere, this solution is safer, since the entire
optimization is just an implementation detail of RegionStore.

<rdar://problem/13599304>

Modified:
    cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp
    cfe/trunk/test/Analysis/analyzer-config.c
    cfe/trunk/test/Analysis/analyzer-config.cpp
    cfe/trunk/test/Analysis/uninit-vals.m

Modified: cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp?rev=179767&r1=179766&r2=179767&view=diff
==============================================================================
--- cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp (original)
+++ cfe/trunk/lib/StaticAnalyzer/Core/RegionStore.cpp Thu Apr 18 11:33:46 2013
@@ -19,10 +19,12 @@
 #include "clang/Analysis/Analyses/LiveVariables.h"
 #include "clang/Analysis/AnalysisContext.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
 #include "llvm/ADT/ImmutableList.h"
 #include "llvm/ADT/ImmutableMap.h"
 #include "llvm/ADT/Optional.h"
@@ -323,6 +325,7 @@ class invalidateRegionsWorker;
 class RegionStoreManager : public StoreManager {
 public:
   const RegionStoreFeatures Features;
+
   RegionBindings::Factory RBFactory;
   mutable ClusterBindings::Factory CBFactory;
 
@@ -332,6 +335,16 @@ private:
                          SValListTy> LazyBindingsMapTy;
   LazyBindingsMapTy LazyBindingsMap;
 
+  /// The largest number of fields a struct can have and still be
+  /// considered "small".
+  ///
+  /// This is currently used to decide whether or not it is worth "forcing" a
+  /// LazyCompoundVal on bind.
+  ///
+  /// This is controlled by 'region-store-small-struct-limit' option.
+  /// To disable all small-struct-dependent behavior, set the option to "0".
+  unsigned SmallStructLimit;
+
   /// \brief A helper used to populate the work list with the given set of
   /// regions.
   void populateWorkList(invalidateRegionsWorker &W,
@@ -342,7 +355,14 @@ private:
 public:
   RegionStoreManager(ProgramStateManager& mgr, const RegionStoreFeatures &f)
     : StoreManager(mgr), Features(f),
-      RBFactory(mgr.getAllocator()), CBFactory(mgr.getAllocator()) {}
+      RBFactory(mgr.getAllocator()), CBFactory(mgr.getAllocator()),
+      SmallStructLimit(0) {
+    if (SubEngine *Eng = StateMgr.getOwningEngine()) {
+      AnalyzerOptions &Options = Eng->getAnalysisManager().options;
+      SmallStructLimit =
+        Options.getOptionAsInteger("region-store-small-struct-limit", 2);
+    }
+  }
 
 
   /// setImplicitDefaultValue - Set the default binding for the provided
@@ -423,6 +443,21 @@ public: // Part of public interface to c
                                const CompoundLiteralExpr *CL,
                                const LocationContext *LC, SVal V);
 
+  /// Attempt to extract the fields of \p LCV and bind them to the struct region
+  /// \p R.
+  ///
+  /// This path is used when it seems advantageous to "force" loading the values
+  /// within a LazyCompoundVal to bind memberwise to the struct region, rather
+  /// than using a Default binding at the base of the entire region. This is a
+  /// heuristic attempting to avoid building long chains of LazyCompoundVals.
+  ///
+  /// \returns The updated store bindings, or \c None if binding non-lazily
+  ///          would be too expensive.
+  Optional<RegionBindingsRef> tryBindSmallStruct(RegionBindingsConstRef B,
+                                                 const TypedValueRegion *R,
+                                                 const RecordDecl *RD,
+                                                 nonloc::LazyCompoundVal LCV);
+
   /// BindStruct - Bind a compound value to a structure.
   RegionBindingsRef bindStruct(RegionBindingsConstRef B,
                                const TypedValueRegion* R, SVal V);
@@ -2013,7 +2048,7 @@ RegionStoreManager::bindArray(RegionBind
     else if (ElementTy->isArrayType())
       NewB = bindArray(NewB, ER, *VI);
     else
-      NewB = bind(NewB, svalBuilder.makeLoc(ER), *VI);
+      NewB = bind(NewB, loc::MemRegionVal(ER), *VI);
   }
 
   // If the init list is shorter than the array length, set the
@@ -2054,14 +2089,56 @@ RegionBindingsRef RegionStoreManager::bi
     
     NonLoc Idx = svalBuilder.makeArrayIndex(index);
     const ElementRegion *ER = MRMgr.getElementRegion(ElemType, Idx, R, Ctx);
-    
+
     if (ElemType->isArrayType())
       NewB = bindArray(NewB, ER, *VI);
     else if (ElemType->isStructureOrClassType())
       NewB = bindStruct(NewB, ER, *VI);
     else
-      NewB = bind(NewB, svalBuilder.makeLoc(ER), *VI);
+      NewB = bind(NewB, loc::MemRegionVal(ER), *VI);
+  }
+  return NewB;
+}
+
+Optional<RegionBindingsRef>
+RegionStoreManager::tryBindSmallStruct(RegionBindingsConstRef B,
+                                       const TypedValueRegion *R,
+                                       const RecordDecl *RD,
+                                       nonloc::LazyCompoundVal LCV) {
+  FieldVector Fields;
+
+  if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD))
+    if (Class->getNumBases() != 0 || Class->getNumVBases() != 0)
+      return None;
+
+  for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end();
+       I != E; ++I) {
+    const FieldDecl *FD = *I;
+    if (FD->isUnnamedBitfield())
+      continue;
+
+    // If there are too many fields, or if any of the fields are aggregates,
+    // just use the LCV as a default binding.
+    if (Fields.size() == SmallStructLimit)
+      return None;
+
+    QualType Ty = FD->getType();
+    if (!(Ty->isScalarType() || Ty->isReferenceType()))
+      return None;
+
+    Fields.push_back(*I);
   }
+
+  RegionBindingsRef NewB = B;
+  
+  for (FieldVector::iterator I = Fields.begin(), E = Fields.end(); I != E; ++I){
+    const FieldRegion *SourceFR = MRMgr.getFieldRegion(*I, LCV.getRegion());
+    SVal V = getBindingForField(getRegionBindings(LCV.getStore()), SourceFR);
+
+    const FieldRegion *DestFR = MRMgr.getFieldRegion(*I, R);
+    NewB = bind(NewB, loc::MemRegionVal(DestFR), V);
+  }
+
   return NewB;
 }
 
@@ -2075,13 +2152,19 @@ RegionBindingsRef RegionStoreManager::bi
   assert(T->isStructureOrClassType());
 
   const RecordType* RT = T->getAs<RecordType>();
-  RecordDecl *RD = RT->getDecl();
+  const RecordDecl *RD = RT->getDecl();
 
   if (!RD->isCompleteDefinition())
     return B;
 
   // Handle lazy compound values and symbolic values.
-  if (V.getAs<nonloc::LazyCompoundVal>() || V.getAs<nonloc::SymbolVal>())
+  if (Optional<nonloc::LazyCompoundVal> LCV =
+        V.getAs<nonloc::LazyCompoundVal>()) {
+    if (Optional<RegionBindingsRef> NewB = tryBindSmallStruct(B, R, RD, *LCV))
+      return *NewB;
+    return bindAggregate(B, R, V);
+  }
+  if (V.getAs<nonloc::SymbolVal>())
     return bindAggregate(B, R, V);
 
   // We may get non-CompoundVal accidentally due to imprecise cast logic or
@@ -2113,7 +2196,7 @@ RegionBindingsRef RegionStoreManager::bi
     else if (FTy->isStructureOrClassType())
       NewB = bindStruct(NewB, FR, *VI);
     else
-      NewB = bind(NewB, svalBuilder.makeLoc(FR), *VI);
+      NewB = bind(NewB, loc::MemRegionVal(FR), *VI);
     ++VI;
   }
 

Modified: cfe/trunk/test/Analysis/analyzer-config.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/analyzer-config.c?rev=179767&r1=179766&r2=179767&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/analyzer-config.c (original)
+++ cfe/trunk/test/Analysis/analyzer-config.c Thu Apr 18 11:33:46 2013
@@ -16,6 +16,7 @@ void foo() { bar(); }
 // CHECK-NEXT: max-nodes = 150000
 // CHECK-NEXT: max-times-inline-large = 32
 // CHECK-NEXT: mode = deep
+// CHECK-NEXT: region-store-small-struct-limit = 2
 // CHECK-NEXT: [stats]
-// CHECK-NEXT: num-entries = 11
+// CHECK-NEXT: num-entries = 12
 

Modified: cfe/trunk/test/Analysis/analyzer-config.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/analyzer-config.cpp?rev=179767&r1=179766&r2=179767&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/analyzer-config.cpp (original)
+++ cfe/trunk/test/Analysis/analyzer-config.cpp Thu Apr 18 11:33:46 2013
@@ -26,5 +26,6 @@ public:
 // CHECK-NEXT: max-nodes = 150000
 // CHECK-NEXT: max-times-inline-large = 32
 // CHECK-NEXT: mode = deep
+// CHECK-NEXT: region-store-small-struct-limit = 2
 // CHECK-NEXT: [stats]
-// CHECK-NEXT: num-entries = 15
+// CHECK-NEXT: num-entries = 16

Modified: cfe/trunk/test/Analysis/uninit-vals.m
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Analysis/uninit-vals.m?rev=179767&r1=179766&r2=179767&view=diff
==============================================================================
--- cfe/trunk/test/Analysis/uninit-vals.m (original)
+++ cfe/trunk/test/Analysis/uninit-vals.m Thu Apr 18 11:33:46 2013
@@ -43,6 +43,7 @@ void PR10163 (void) {
 typedef struct {
   float x;
   float y;
+  float z;
 } Point;
 typedef struct {
   Point origin;
@@ -53,6 +54,7 @@ Point makePoint(float x, float y) {
   Point result;
   result.x = x;
   result.y = y;
+  result.z = 0.0;
   return result;
 }
 
@@ -85,6 +87,7 @@ void PR14765_argument(Circle *testObj) {
 typedef struct {
   int x;
   int y;
+  int z;
 } IntPoint;
 typedef struct {
   IntPoint origin;
@@ -95,6 +98,7 @@ IntPoint makeIntPoint(int x, int y) {
   IntPoint result;
   result.x = x;
   result.y = y;
+  result.z = 0;
   return result;
 }
 
@@ -104,6 +108,7 @@ void PR14765_test_int() {
   clang_analyzer_eval(testObj->size == 0); // expected-warning{{TRUE}}
   clang_analyzer_eval(testObj->origin.x == 0); // expected-warning{{TRUE}}
   clang_analyzer_eval(testObj->origin.y == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(testObj->origin.z == 0); // expected-warning{{TRUE}}
 
   testObj->origin = makeIntPoint(1, 2);
   if (testObj->size > 0) { ; } // warning occurs here
@@ -115,6 +120,7 @@ void PR14765_test_int() {
   clang_analyzer_eval(testObj->size == 0); // expected-warning{{UNKNOWN}}
   clang_analyzer_eval(testObj->origin.x == 1); // expected-warning{{TRUE}}
   clang_analyzer_eval(testObj->origin.y == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(testObj->origin.z == 0); // expected-warning{{TRUE}}
 
   free(testObj);
 }
@@ -127,6 +133,7 @@ void PR14765_argument_int(IntCircle *tes
   clang_analyzer_eval(testObj->size == oldSize); // expected-warning{{TRUE}}
   clang_analyzer_eval(testObj->origin.x == 1); // expected-warning{{TRUE}}
   clang_analyzer_eval(testObj->origin.y == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(testObj->origin.z == 0); // expected-warning{{TRUE}}
 }
 
 
@@ -141,3 +148,137 @@ void rdar13292559(Circle input) {
   useCircle(obj); // no-warning
 }
 
+
+typedef struct {
+  int x;
+  int y;
+} IntPoint2D;
+typedef struct {
+  IntPoint2D origin;
+  int size;
+} IntCircle2D;
+
+IntPoint2D makeIntPoint2D(int x, int y) {
+  IntPoint2D result;
+  result.x = x;
+  result.y = y;
+  return result;
+}
+
+void testSmallStructsCopiedPerField() {
+  IntPoint2D a;
+  a.x = 0;
+
+  IntPoint2D b = a;
+  extern void useInt(int);
+  useInt(b.x); // no-warning
+  useInt(b.y); // expected-warning{{uninitialized}}
+}
+
+void testLargeStructsNotCopiedPerField() {
+  IntPoint a;
+  a.x = 0;
+
+  IntPoint b = a;
+  extern void useInt(int);
+  useInt(b.x); // no-warning
+  useInt(b.y); // no-warning
+}
+
+void testSmallStructInLargerStruct() {
+  IntCircle2D *testObj = calloc(sizeof(IntCircle2D), 1);
+
+  clang_analyzer_eval(testObj->size == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(testObj->origin.x == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(testObj->origin.y == 0); // expected-warning{{TRUE}}
+
+  testObj->origin = makeIntPoint2D(1, 2);
+  if (testObj->size > 0) { ; } // warning occurs here
+
+  clang_analyzer_eval(testObj->size == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(testObj->origin.x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(testObj->origin.y == 2); // expected-warning{{TRUE}}
+
+  free(testObj);
+}
+
+void testCopySmallStructIntoArgument(IntCircle2D *testObj) {
+  int oldSize = testObj->size;
+  clang_analyzer_eval(testObj->size == oldSize); // expected-warning{{TRUE}}
+
+  testObj->origin = makeIntPoint2D(1, 2);
+  clang_analyzer_eval(testObj->size == oldSize); // expected-warning{{TRUE}}
+  clang_analyzer_eval(testObj->origin.x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(testObj->origin.y == 2); // expected-warning{{TRUE}}
+}
+
+void testSmallStructBitfields() {
+  struct {
+    int x : 4;
+    int y : 4;
+  } a, b;
+
+  a.x = 1;
+  a.y = 2;
+
+  b = a;
+  clang_analyzer_eval(b.x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(b.y == 2); // expected-warning{{TRUE}}
+}
+
+void testSmallStructBitfieldsFirstUndef() {
+  struct {
+    int x : 4;
+    int y : 4;
+  } a, b;
+
+  a.y = 2;
+
+  b = a;
+  clang_analyzer_eval(b.y == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(b.x == 1); // expected-warning{{garbage}}
+}
+
+void testSmallStructBitfieldsSecondUndef() {
+  struct {
+    int x : 4;
+    int y : 4;
+  } a, b;
+
+  a.x = 1;
+
+  b = a;
+  clang_analyzer_eval(b.x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(b.y == 2); // expected-warning{{garbage}}
+}
+
+void testSmallStructBitfieldsFirstUnnamed() {
+  struct {
+    int : 4;
+    int y : 4;
+  } a, b, c;
+
+  a.y = 2;
+
+  b = a;
+  clang_analyzer_eval(b.y == 2); // expected-warning{{TRUE}}
+
+  b = c;
+  clang_analyzer_eval(b.y == 2); // expected-warning{{garbage}}
+}
+
+void testSmallStructBitfieldsSecondUnnamed() {
+  struct {
+    int x : 4;
+    int : 4;
+  } a, b, c;
+
+  a.x = 1;
+
+  b = a;
+  clang_analyzer_eval(b.x == 1); // expected-warning{{TRUE}}
+
+  b = c;
+  clang_analyzer_eval(b.x == 1); // expected-warning{{garbage}}
+}
+





More information about the cfe-commits mailing list