[clang] 1f57d76 - [analyzer] Refactor range inference for symbolic expressions

Valeriy Savchenko via cfe-commits cfe-commits at lists.llvm.org
Thu May 28 08:57:18 PDT 2020


Author: Valeriy Savchenko
Date: 2020-05-28T18:54:52+03:00
New Revision: 1f57d76a8dd00611aaa4b33048be195ea9a2dc44

URL: https://github.com/llvm/llvm-project/commit/1f57d76a8dd00611aaa4b33048be195ea9a2dc44
DIFF: https://github.com/llvm/llvm-project/commit/1f57d76a8dd00611aaa4b33048be195ea9a2dc44.diff

LOG: [analyzer] Refactor range inference for symbolic expressions

Summary:
This change introduces a new component to unite all of the reasoning
we have about operations on ranges in the analyzer's solver.
In many cases, we might conclude that the range for a symbolic operation
is much more narrow than the type implies.  While reasoning about
runtime conditions (especially in loops), we need to support more and
more of those little pieces of logic.  The new component mostly plays
a role of an organizer for those, and allows us to focus on the actual
reasoning about ranges and not dispatching manually on the types of the
nested symbolic expressions.

Differential Revision: https://reviews.llvm.org/D79232

Added: 
    clang/test/Analysis/double-ranges-bug.c

Modified: 
    clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
    clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
    clang/test/Analysis/constant-folding.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
index c72f8292647d..97c9c6d63eb2 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
@@ -30,6 +30,10 @@ class Range : public std::pair<const llvm::APSInt *, const llvm::APSInt *> {
       : std::pair<const llvm::APSInt *, const llvm::APSInt *>(&from, &to) {
     assert(from <= to);
   }
+
+  Range(const llvm::APSInt &point)
+      : std::pair<const llvm::APSInt *, const llvm::APSInt *>(&point, &point) {}
+
   bool Includes(const llvm::APSInt &v) const {
     return *first <= v && v <= *second;
   }
@@ -89,6 +93,9 @@ class RangeSet {
   RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to)
       : ranges(F.add(F.getEmptySet(), Range(from, to))) {}
 
+  /// Construct a new RangeSet representing the given point as a range.
+  RangeSet(Factory &F, const llvm::APSInt &point) : RangeSet(F, point, point) {}
+
   /// Profile - Generates a hash profile of this RangeSet for use
   ///  by FoldingSet.
   void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); }

diff  --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index a3ea7d4c013b..368324d3d34f 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -16,6 +16,7 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
 #include "llvm/Support/raw_ostream.h"
@@ -23,10 +24,16 @@
 using namespace clang;
 using namespace ento;
 
+//===----------------------------------------------------------------------===//
+//                           RangeSet implementation
+//===----------------------------------------------------------------------===//
+
 void RangeSet::IntersectInRange(BasicValueFactory &BV, Factory &F,
-                      const llvm::APSInt &Lower, const llvm::APSInt &Upper,
-                      PrimRangeSet &newRanges, PrimRangeSet::iterator &i,
-                      PrimRangeSet::iterator &e) const {
+                                const llvm::APSInt &Lower,
+                                const llvm::APSInt &Upper,
+                                PrimRangeSet &newRanges,
+                                PrimRangeSet::iterator &i,
+                                PrimRangeSet::iterator &e) const {
   // There are six cases for each range R in the set:
   //   1. R is entirely before the intersection range.
   //   2. R is entirely after the intersection range.
@@ -66,6 +73,11 @@ const llvm::APSInt &RangeSet::getMinValue() const {
 }
 
 bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
+  if (isEmpty()) {
+    // This range is already infeasible.
+    return false;
+  }
+
   // This function has nine cases, the cartesian product of range-testing
   // both the upper and lower bounds against the symbol's type.
   // Each case requires a 
diff erent pinning operation.
@@ -283,6 +295,207 @@ void RangeSet::print(raw_ostream &os) const {
 }
 
 namespace {
+
+/// A little component aggregating all of the reasoning we have about
+/// the ranges of symbolic expressions.
+///
+/// Even when we don't know the exact values of the operands, we still
+/// can get a pretty good estimate of the result's range.
+class SymbolicRangeInferrer
+    : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
+public:
+  static RangeSet inferRange(BasicValueFactory &BV, RangeSet::Factory &F,
+                             ProgramStateRef State, SymbolRef Sym) {
+    SymbolicRangeInferrer Inferrer(BV, F, State);
+    return Inferrer.infer(Sym);
+  }
+
+  RangeSet VisitSymExpr(SymbolRef Sym) {
+    // If we got to this function, the actual type of the symbolic
+    // expression is not supported for advanced inference.
+    // In this case, we simply backoff to the default "let's simply
+    // infer the range from the expression's type".
+    return infer(Sym->getType());
+  }
+
+  RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
+    return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
+    return VisitBinaryOperator(Sym);
+  }
+
+  RangeSet VisitSymSymExpr(const SymSymExpr *Sym) {
+    return VisitBinaryOperator(Sym);
+  }
+
+private:
+  SymbolicRangeInferrer(BasicValueFactory &BV, RangeSet::Factory &F,
+                        ProgramStateRef S)
+      : ValueFactory(BV), RangeFactory(F), State(S) {}
+
+  /// Infer range information from the given integer constant.
+  ///
+  /// It's not a real "inference", but is here for operating with
+  /// sub-expressions in a more polymorphic manner.
+  RangeSet inferAs(const llvm::APSInt &Val, QualType) {
+    return {RangeFactory, Val};
+  }
+
+  /// Infer range information from symbol in the context of the given type.
+  RangeSet inferAs(SymbolRef Sym, QualType DestType) {
+    QualType ActualType = Sym->getType();
+    // Check that we can reason about the symbol at all.
+    if (ActualType->isIntegralOrEnumerationType() ||
+        Loc::isLocType(ActualType)) {
+      return infer(Sym);
+    }
+    // Otherwise, let's simply infer from the destination type.
+    // We couldn't figure out nothing else about that expression.
+    return infer(DestType);
+  }
+
+  RangeSet infer(SymbolRef Sym) {
+    const RangeSet *AssociatedRange = State->get<ConstraintRange>(Sym);
+
+    // If Sym is a 
diff erence of symbols A - B, then maybe we have range set
+    // stored for B - A.
+    const RangeSet *RangeAssociatedWithNegatedSym =
+        getRangeForMinusSymbol(State, Sym);
+
+    // If we have range set stored for both A - B and B - A then calculate the
+    // effective range set by intersecting the range set for A - B and the
+    // negated range set of B - A.
+    if (AssociatedRange && RangeAssociatedWithNegatedSym)
+      return AssociatedRange->Intersect(
+          ValueFactory, RangeFactory,
+          RangeAssociatedWithNegatedSym->Negate(ValueFactory, RangeFactory));
+
+    if (AssociatedRange)
+      return *AssociatedRange;
+
+    if (RangeAssociatedWithNegatedSym)
+      return RangeAssociatedWithNegatedSym->Negate(ValueFactory, RangeFactory);
+
+    return Visit(Sym);
+  }
+
+  /// Infer range information solely from the type.
+  RangeSet infer(QualType T) {
+    // Lazily generate a new RangeSet representing all possible values for the
+    // given symbol type.
+    RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
+                    ValueFactory.getMaxValue(T));
+
+    // References are known to be non-zero.
+    if (T->isReferenceType())
+      return assumeNonZero(Result, T);
+
+    return Result;
+  }
+
+  template <class BinarySymExprTy>
+  RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
+    // TODO #1: VisitBinaryOperator implementation might not make a good
+    // use of the inferred ranges.  In this case, we might be calculating
+    // everything for nothing.  This being said, we should introduce some
+    // sort of laziness mechanism here.
+    //
+    // TODO #2: We didn't go into the nested expressions before, so it
+    // might cause us spending much more time doing the inference.
+    // This can be a problem for deeply nested expressions that are
+    // involved in conditions and get tested continuously.  We definitely
+    // need to address this issue and introduce some sort of caching
+    // in here.
+    QualType ResultType = Sym->getType();
+    return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
+                               Sym->getOpcode(),
+                               inferAs(Sym->getRHS(), ResultType), ResultType);
+  }
+
+  RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
+                               RangeSet RHS, QualType T) {
+    switch (Op) {
+    case BO_Or:
+      return VisitOrOperator(LHS, RHS, T);
+    case BO_And:
+      return VisitAndOperator(LHS, RHS, T);
+    default:
+      return infer(T);
+    }
+  }
+
+  RangeSet VisitOrOperator(RangeSet LHS, RangeSet RHS, QualType T) {
+    // TODO: generalize for the ranged RHS.
+    if (const llvm::APSInt *RHSConstant = RHS.getConcreteValue()) {
+      // For unsigned types, the output is greater-or-equal than RHS.
+      if (T->isUnsignedIntegerType()) {
+        return LHS.Intersect(ValueFactory, RangeFactory, *RHSConstant,
+                             ValueFactory.getMaxValue(T));
+      }
+
+      // Bitwise-or with a non-zero constant is always non-zero.
+      const llvm::APSInt &Zero = ValueFactory.getAPSIntType(T).getZeroValue();
+      if (*RHSConstant != Zero) {
+        return assumeNonZero(LHS, T);
+      }
+    }
+    return infer(T);
+  }
+
+  RangeSet VisitAndOperator(RangeSet LHS, RangeSet RHS, QualType T) {
+    // TODO: generalize for the ranged RHS.
+    if (const llvm::APSInt *RHSConstant = RHS.getConcreteValue()) {
+      const llvm::APSInt &Zero = ValueFactory.getAPSIntType(T).getZeroValue();
+
+      // For unsigned types, or positive RHS,
+      // bitwise-and output is always smaller-or-equal than RHS (assuming two's
+      // complement representation of signed types).
+      if (T->isUnsignedIntegerType() || *RHSConstant >= Zero) {
+        return LHS.Intersect(ValueFactory, RangeFactory,
+                             ValueFactory.getMinValue(T), *RHSConstant);
+      }
+    }
+    return infer(T);
+  }
+
+  /// Return a range set subtracting zero from \p Domain.
+  RangeSet assumeNonZero(RangeSet Domain, QualType T) {
+    APSIntType IntType = ValueFactory.getAPSIntType(T);
+    return Domain.Intersect(ValueFactory, RangeFactory,
+                            ++IntType.getZeroValue(), --IntType.getZeroValue());
+  }
+
+  // FIXME: Once SValBuilder supports unary minus, we should use SValBuilder to
+  //        obtain the negated symbolic expression instead of constructing the
+  //        symbol manually. This will allow us to support finding ranges of not
+  //        only negated SymSymExpr-type expressions, but also of other, simpler
+  //        expressions which we currently do not know how to negate.
+  const RangeSet *getRangeForMinusSymbol(ProgramStateRef State, SymbolRef Sym) {
+    if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(Sym)) {
+      if (SSE->getOpcode() == BO_Sub) {
+        QualType T = Sym->getType();
+        SymbolManager &SymMgr = State->getSymbolManager();
+        SymbolRef negSym =
+            SymMgr.getSymSymExpr(SSE->getRHS(), BO_Sub, SSE->getLHS(), T);
+
+        if (const RangeSet *negV = State->get<ConstraintRange>(negSym)) {
+          // Unsigned range set cannot be negated, unless it is [0, 0].
+          if (T->isUnsignedIntegerOrEnumerationType() ||
+              T->isSignedIntegerOrEnumerationType())
+            return negV;
+        }
+      }
+    }
+    return nullptr;
+  }
+
+  BasicValueFactory &ValueFactory;
+  RangeSet::Factory &RangeFactory;
+  ProgramStateRef State;
+};
+
 class RangeConstraintManager : public RangedConstraintManager {
 public:
   RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
@@ -350,8 +563,7 @@ class RangeConstraintManager : public RangedConstraintManager {
   RangeSet::Factory F;
 
   RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
-  const RangeSet* getRangeForMinusSymbol(ProgramStateRef State,
-                                         SymbolRef Sym);
+  const RangeSet *getRangeForMinusSymbol(ProgramStateRef State, SymbolRef Sym);
 
   RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
                          const llvm::APSInt &Int,
@@ -368,7 +580,6 @@ class RangeConstraintManager : public RangedConstraintManager {
   RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
                          const llvm::APSInt &Int,
                          const llvm::APSInt &Adjustment);
-
 };
 
 } // end anonymous namespace
@@ -475,87 +686,9 @@ RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
   return Changed ? State->set<ConstraintRange>(CR) : State;
 }
 
-/// Return a range set subtracting zero from \p Domain.
-static RangeSet assumeNonZero(
-    BasicValueFactory &BV,
-    RangeSet::Factory &F,
-    SymbolRef Sym,
-    RangeSet Domain) {
-  APSIntType IntType = BV.getAPSIntType(Sym->getType());
-  return Domain.Intersect(BV, F, ++IntType.getZeroValue(),
-      --IntType.getZeroValue());
-}
-
-/// Apply implicit constraints for bitwise OR- and AND-.
-/// For unsigned types, bitwise OR with a constant always returns
-/// a value greater-or-equal than the constant, and bitwise AND
-/// returns a value less-or-equal then the constant.
-///
-/// Pattern matches the expression \p Sym against those rule,
-/// and applies the required constraints.
-/// \p Input Previously established expression range set
-static RangeSet applyBitwiseConstraints(
-    BasicValueFactory &BV,
-    RangeSet::Factory &F,
-    RangeSet Input,
-    const SymIntExpr* SIE) {
-  QualType T = SIE->getType();
-  bool IsUnsigned = T->isUnsignedIntegerType();
-  const llvm::APSInt &RHS = SIE->getRHS();
-  const llvm::APSInt &Zero = BV.getAPSIntType(T).getZeroValue();
-  BinaryOperator::Opcode Operator = SIE->getOpcode();
-
-  // For unsigned types, the output of bitwise-or is bigger-or-equal than RHS.
-  if (Operator == BO_Or && IsUnsigned)
-    return Input.Intersect(BV, F, RHS, BV.getMaxValue(T));
-
-  // Bitwise-or with a non-zero constant is always non-zero.
-  if (Operator == BO_Or && RHS != Zero)
-    return assumeNonZero(BV, F, SIE, Input);
-
-  // For unsigned types, or positive RHS,
-  // bitwise-and output is always smaller-or-equal than RHS (assuming two's
-  // complement representation of signed types).
-  if (Operator == BO_And && (IsUnsigned || RHS >= Zero))
-    return Input.Intersect(BV, F, BV.getMinValue(T), RHS);
-
-  return Input;
-}
-
 RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
                                           SymbolRef Sym) {
-  ConstraintRangeTy::data_type *V = State->get<ConstraintRange>(Sym);
-
-  // If Sym is a 
diff erence of symbols A - B, then maybe we have range set
-  // stored for B - A.
-  BasicValueFactory &BV = getBasicVals();
-  const RangeSet *R = getRangeForMinusSymbol(State, Sym);
-
-  // If we have range set stored for both A - B and B - A then calculate the
-  // effective range set by intersecting the range set for A - B and the
-  // negated range set of B - A.
-  if (V && R)
-    return V->Intersect(BV, F, R->Negate(BV, F));
-  if (V)
-    return *V;
-  if (R)
-    return R->Negate(BV, F);
-
-  // Lazily generate a new RangeSet representing all possible values for the
-  // given symbol type.
-  QualType T = Sym->getType();
-
-  RangeSet Result(F, BV.getMinValue(T), BV.getMaxValue(T));
-
-  // References are known to be non-zero.
-  if (T->isReferenceType())
-    return assumeNonZero(BV, F, Sym, Result);
-
-  // Known constraints on ranges of bitwise expressions.
-  if (const SymIntExpr* SIE = dyn_cast<SymIntExpr>(Sym))
-    return applyBitwiseConstraints(BV, F, Result, SIE);
-
-  return Result;
+  return SymbolicRangeInferrer::inferRange(getBasicVals(), F, State, Sym);
 }
 
 // FIXME: Once SValBuilder supports unary minus, we should use SValBuilder to

diff  --git a/clang/test/Analysis/constant-folding.c b/clang/test/Analysis/constant-folding.c
index fc04e608f7ef..5429f3c740e1 100644
--- a/clang/test/Analysis/constant-folding.c
+++ b/clang/test/Analysis/constant-folding.c
@@ -115,7 +115,22 @@ void testBitwiseRules(unsigned int a, int b) {
 #endif
 
   // Check that dynamically computed constants also work.
-  int constant = 1 << 3;
+  unsigned int constant = 1 << 3;
   unsigned int d = a | constant;
-  clang_analyzer_eval(constant > 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(d >= constant); // expected-warning{{TRUE}}
+
+  // Check that nested expressions also work.
+  clang_analyzer_eval(((a | 10) | 5) >= 10); // expected-warning{{TRUE}}
+
+  // TODO: We misuse intersection of ranges for bitwise AND and OR operators.
+  //       Resulting ranges for the following cases are infeasible.
+  //       This is what causes paradoxical results below.
+  if (a > 10) {
+    clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
+    clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
+  }
+  if (a < 10) {
+    clang_analyzer_eval((a | 20) >= 20); // expected-warning{{FALSE}}
+    clang_analyzer_eval((a | 20) < 20);  // expected-warning{{FALSE}}
+  }
 }

diff  --git a/clang/test/Analysis/double-ranges-bug.c b/clang/test/Analysis/double-ranges-bug.c
new file mode 100644
index 000000000000..aa3dde293034
--- /dev/null
+++ b/clang/test/Analysis/double-ranges-bug.c
@@ -0,0 +1,22 @@
+// RUN: %clang_analyze_cc1 -verify %s -analyzer-checker=core
+
+// expected-no-diagnostics
+
+typedef unsigned long int A;
+
+extern int fill(A **values, int *nvalues);
+
+void foo() {
+  A *values;
+  int nvalues;
+  fill(&values, &nvalues);
+
+  int i = 1;
+  double x, y;
+
+  y = values[i - 1];
+  x = values[i];
+
+  if (x <= y) {
+  }
+}


        


More information about the cfe-commits mailing list