[clang] 8ddbb44 - [Analyzer][solver] Simplify existing eq classes and constraints when a new constraint is added

Gabor Marton via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 14 03:19:32 PDT 2021


Author: Gabor Marton
Date: 2021-06-14T12:19:09+02:00
New Revision: 8ddbb442b6e87efc9c6599280740c6f4fc40963d

URL: https://github.com/llvm/llvm-project/commit/8ddbb442b6e87efc9c6599280740c6f4fc40963d
DIFF: https://github.com/llvm/llvm-project/commit/8ddbb442b6e87efc9c6599280740c6f4fc40963d.diff

LOG: [Analyzer][solver] Simplify existing eq classes and constraints when a new constraint is added

Update `setConstraint` to simplify existing equivalence classes when a
new constraint is added. In this patch we iterate over all existing
equivalence classes and constraints and try to simplfy them with
simplifySVal. This solves problematic cases where we have two symbols in
the tree, e.g.:
```
int test_rhs_further_constrained(int x, int y) {
  if (x + y != 0)
    return 0;
  if (y != 0)
    return 0;
  clang_analyzer_eval(x + y == 0); // expected-warning{{TRUE}}
  clang_analyzer_eval(y == 0);     // expected-warning{{TRUE}}
  return 0;
}
```

Differential Revision: https://reviews.llvm.org/D103314

Added: 
    clang/test/Analysis/find-binop-constraints.cpp

Modified: 
    clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
    clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
index 4a118074463d..a4957c697c0a 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
@@ -256,7 +256,7 @@ class RangeSet {
   ///  by FoldingSet.
   void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, *this); }
 
-  /// getConcreteValue - If a symbol is contrained to equal a specific integer
+  /// getConcreteValue - If a symbol is constrained to equal a specific integer
   ///  constant then this method returns that value.  Otherwise, it returns
   ///  NULL.
   const llvm::APSInt *getConcreteValue() const {

diff  --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index 1088565d1a23..ac7767f0d3c4 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -582,9 +583,17 @@ class EquivalenceClass : public llvm::FoldingSetNode {
   LLVM_NODISCARD inline ClassSet
   getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
 
+  LLVM_NODISCARD static inline Optional<bool> areEqual(ProgramStateRef State,
+                                                       EquivalenceClass First,
+                                                       EquivalenceClass Second);
   LLVM_NODISCARD static inline Optional<bool>
   areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
 
+  /// Iterate over all symbols and try to simplify them.
+  LLVM_NODISCARD ProgramStateRef simplify(SValBuilder &SVB,
+                                          RangeSet::Factory &F,
+                                          ProgramStateRef State);
+
   /// Check equivalence data for consistency.
   LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED static bool
   isClassDataConsistent(ProgramStateRef State);
@@ -1375,6 +1384,12 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
 //                  Constraint manager implementation details
 //===----------------------------------------------------------------------===//
 
+static SymbolRef simplify(ProgramStateRef State, SymbolRef Sym) {
+  SValBuilder &SVB = State->getStateManager().getSValBuilder();
+  SVal SimplifiedVal = SVB.simplifySVal(State, SVB.makeSymbolVal(Sym));
+  return SimplifiedVal.getAsSymbol();
+}
+
 class RangeConstraintManager : public RangedConstraintManager {
 public:
   RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
@@ -1488,6 +1503,9 @@ class RangeConstraintManager : public RangedConstraintManager {
       // This is an infeasible assumption.
       return nullptr;
 
+    if (SymbolRef SimplifiedSym = simplify(State, Sym))
+      Sym = SimplifiedSym;
+
     if (ProgramStateRef NewState = setConstraint(State, Sym, NewConstraint)) {
       if (auto Equality = EqualityInfo::extract(Sym, Int, Adjustment)) {
         // If the original assumption is not Sym + Adjustment !=/</> Int,
@@ -1554,9 +1572,47 @@ class RangeConstraintManager : public RangedConstraintManager {
     return State->set<ConstraintRange>(Constraints);
   }
 
+  // Associate a constraint to a symbolic expression. First, we set the
+  // constraint in the State, then we try to simplify existing symbolic
+  // expressions based on the newly set constraint.
   LLVM_NODISCARD inline ProgramStateRef
   setConstraint(ProgramStateRef State, SymbolRef Sym, RangeSet Constraint) {
-    return setConstraint(State, EquivalenceClass::find(State, Sym), Constraint);
+    assert(State);
+
+    State = setConstraint(State, EquivalenceClass::find(State, Sym), Constraint);
+    if (!State)
+      return nullptr;
+
+    // We have a chance to simplify existing symbolic values if the new
+    // constraint is a constant.
+    if (!Constraint.getConcreteValue())
+      return State;
+
+    llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
+    // Iterate over all equivalence classes and try to simplify them.
+    ClassMembersTy Members = State->get<ClassMembers>();
+    for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
+      EquivalenceClass Class = ClassToSymbolSet.first;
+      State = Class.simplify(getSValBuilder(), F, State);
+      if (!State)
+        return nullptr;
+      SimplifiedClasses.insert(Class);
+    }
+
+    // Trivial equivalence classes (those that have only one symbol member) are
+    // not stored in the State. Thus, we must skim through the constraints as
+    // well. And we try to simplify symbols in the constraints.
+    ConstraintRangeTy Constraints = State->get<ConstraintRange>();
+    for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
+      EquivalenceClass Class = ClassConstraint.first;
+      if (SimplifiedClasses.count(Class)) // Already simplified.
+        continue;
+      State = Class.simplify(getSValBuilder(), F, State);
+      if (!State)
+        return nullptr;
+    }
+
+    return State;
   }
 };
 
@@ -1592,6 +1648,8 @@ ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
 
 inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
                                                SymbolRef Sym) {
+  assert(State && "State should not be null");
+  assert(Sym && "Symbol should not be null");
   // We store far from all Symbol -> Class mappings
   if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
     return *NontrivialClass;
@@ -1723,6 +1781,11 @@ EquivalenceClass::mergeImpl(BasicValueFactory &ValueFactory,
 
   // 4. Update disequality relations
   ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
+  // We are about to merge two classes but they are already known to be
+  // non-equal. This is a contradiction.
+  if (DisequalToOther.contains(*this))
+    return nullptr;
+
   if (!DisequalToOther.isEmpty()) {
     ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
     DisequalityInfo = DF.remove(DisequalityInfo, Other);
@@ -1869,9 +1932,13 @@ inline bool EquivalenceClass::addToDisequalityInfo(
 inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
                                                  SymbolRef FirstSym,
                                                  SymbolRef SecondSym) {
-  EquivalenceClass First = find(State, FirstSym);
-  EquivalenceClass Second = find(State, SecondSym);
+  return EquivalenceClass::areEqual(State, find(State, FirstSym),
+                                    find(State, SecondSym));
+}
 
+inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
+                                                 EquivalenceClass First,
+                                                 EquivalenceClass Second) {
   // The same equivalence class => symbols are equal.
   if (First == Second)
     return true;
@@ -1886,6 +1953,30 @@ inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
   return llvm::None;
 }
 
+// Iterate over all symbols and try to simplify them. Once a symbol is
+// simplified then we check if we can merge the simplified symbol's equivalence
+// class to this class. This way, we simplify not just the symbols but the
+// classes as well: we strive to keep the number of the classes to be the
+// absolute minimum.
+LLVM_NODISCARD ProgramStateRef EquivalenceClass::simplify(
+    SValBuilder &SVB, RangeSet::Factory &F, ProgramStateRef State) {
+  SymbolSet ClassMembers = getClassMembers(State);
+  for (const SymbolRef &MemberSym : ClassMembers) {
+    SymbolRef SimplifiedMemberSym = ::simplify(State, MemberSym);
+    if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
+      EquivalenceClass ClassOfSimplifiedSym =
+          EquivalenceClass::find(State, SimplifiedMemberSym);
+      // The simplified symbol should be the member of the original Class,
+      // however, it might be in another existing class at the moment. We
+      // have to merge these classes.
+      State = merge(SVB.getBasicValueFactory(), F, State, ClassOfSimplifiedSym);
+      if (!State)
+        return nullptr;
+    }
+  }
+  return State;
+}
+
 inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
                                                      SymbolRef Sym) {
   return find(State, Sym).getDisequalClasses(State);

diff  --git a/clang/test/Analysis/find-binop-constraints.cpp b/clang/test/Analysis/find-binop-constraints.cpp
new file mode 100644
index 000000000000..be387a6fb360
--- /dev/null
+++ b/clang/test/Analysis/find-binop-constraints.cpp
@@ -0,0 +1,163 @@
+// RUN: %clang_analyze_cc1 %s \
+// RUN:   -analyzer-checker=core \
+// RUN:   -analyzer-checker=debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false \
+// RUN:   -verify
+
+void clang_analyzer_eval(bool);
+void clang_analyzer_warnIfReached();
+
+int test_legacy_behavior(int x, int y) {
+  if (y != 0)
+    return 0;
+  if (x + y != 0)
+    return 0;
+  clang_analyzer_eval(x + y == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(y == 0);     // expected-warning{{TRUE}}
+  return y / (x + y);              // expected-warning{{Division by zero}}
+}
+
+int test_rhs_further_constrained(int x, int y) {
+  if (x + y != 0)
+    return 0;
+  if (y != 0)
+    return 0;
+  clang_analyzer_eval(x + y == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(y == 0);     // expected-warning{{TRUE}}
+  return 0;
+}
+
+int test_lhs_further_constrained(int x, int y) {
+  if (x + y != 0)
+    return 0;
+  if (x != 0)
+    return 0;
+  clang_analyzer_eval(x + y == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(x == 0);     // expected-warning{{TRUE}}
+  return 0;
+}
+
+int test_lhs_and_rhs_further_constrained(int x, int y) {
+  if (x % y != 1)
+    return 0;
+  if (x != 1)
+    return 0;
+  if (y != 2)
+    return 0;
+  clang_analyzer_eval(x % y == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(y == 2);     // expected-warning{{TRUE}}
+  return 0;
+}
+
+int test_commutativity(int x, int y) {
+  if (x + y != 0)
+    return 0;
+  if (y != 0)
+    return 0;
+  clang_analyzer_eval(y + x == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(y == 0);     // expected-warning{{TRUE}}
+  return 0;
+}
+
+int test_binop_when_height_is_2_r(int a, int x, int y, int z) {
+  switch (a) {
+  case 1: {
+    if (x + y + z != 0)
+      return 0;
+    if (z != 0)
+      return 0;
+    clang_analyzer_eval(x + y + z == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(z == 0);         // expected-warning{{TRUE}}
+    break;
+  }
+  case 2: {
+    if (x + y + z != 0)
+      return 0;
+    if (y != 0)
+      return 0;
+    clang_analyzer_eval(x + y + z == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(y == 0);         // expected-warning{{TRUE}}
+    break;
+  }
+  case 3: {
+    if (x + y + z != 0)
+      return 0;
+    if (x != 0)
+      return 0;
+    clang_analyzer_eval(x + y + z == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(x == 0);         // expected-warning{{TRUE}}
+    break;
+  }
+  case 4: {
+    if (x + y + z != 0)
+      return 0;
+    if (x + y != 0)
+      return 0;
+    clang_analyzer_eval(x + y + z == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(x + y == 0);     // expected-warning{{TRUE}}
+    break;
+  }
+  case 5: {
+    if (z != 0)
+      return 0;
+    if (x + y + z != 0)
+      return 0;
+    clang_analyzer_eval(x + y + z == 0); // expected-warning{{TRUE}}
+    if (y != 0)
+      return 0;
+    clang_analyzer_eval(y == 0);         // expected-warning{{TRUE}}
+    clang_analyzer_eval(z == 0);         // expected-warning{{TRUE}}
+    clang_analyzer_eval(x + y + z == 0); // expected-warning{{TRUE}}
+    break;
+  }
+
+  }
+  return 0;
+}
+
+void test_equivalence_classes_are_updated(int a, int b, int c, int d) {
+  if (a + b != c)
+    return;
+  if (a != d)
+    return;
+  if (b != 0)
+    return;
+  clang_analyzer_eval(c == d); // expected-warning{{TRUE}}
+  // Keep the symbols and the constraints! alive.
+  (void)(a * b * c * d);
+  return;
+}
+
+void test_contradiction(int a, int b, int c, int d) {
+  if (a + b != c)
+    return;
+  if (a == c)
+    return;
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+
+  // Bring in the contradiction.
+  if (b != 0)
+    return;
+  clang_analyzer_warnIfReached(); // no-warning, i.e. UNREACHABLE
+  // Keep the symbols and the constraints! alive.
+  (void)(a * b * c * d);
+  return;
+}
+
+void test_deferred_contradiction(int e0, int b0, int b1) {
+
+  int e1 = e0 - b0; // e1 is bound to (reg_$0<int e0>) - (reg_$1<int b0>)
+  (void)(b0 == 2);  // bifurcate
+
+  int e2 = e1 - b1;
+  if (e2 > 0) { // b1 != e1
+    clang_analyzer_warnIfReached();   // expected-warning{{REACHABLE}}
+    // Here, e1 is still bound to (reg_$0<int e0>) - (reg_$1<int b0>) but we
+    // should be able to simplify it to (reg_$0<int e0>) - 2 and thus realize
+    // the contradiction.
+    if (b1 == e1) {
+      clang_analyzer_warnIfReached(); // no-warning, i.e. UNREACHABLE
+      (void)(b0 * b1 * e0 * e1 * e2);
+    }
+  }
+}


        


More information about the cfe-commits mailing list