[clang] 47c4b8b - [analyzer] Generalize bitwise OR rules for ranges

Thu May 28 08:57:21 PDT 2020

Author: Valeriy Savchenko
Date: 2020-05-28T18:55:22+03:00
New Revision: 47c4b8bd68698b1827f39c3056783ed042faf718

URL: https://github.com/llvm/llvm-project/commit/47c4b8bd68698b1827f39c3056783ed042faf718
DIFF: https://github.com/llvm/llvm-project/commit/47c4b8bd68698b1827f39c3056783ed042faf718.diff

LOG: [analyzer] Generalize bitwise OR rules for ranges

Summary:
Previously the current solver started reasoning about bitwise OR
expressions only when one of the operands is a constant.  However,
very similar logic could be applied to ranges.  This commit addresses
this shortcoming.  Additionally, it refines how we deal with negative
operands.

Differential Revision: https://reviews.llvm.org/D79336

Added: 
    

Modified: 
    clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
    clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
    clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
    clang/test/Analysis/constant-folding.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
index ac218bc070e9..a001c0dc7030 100644

--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/BasicValueFactory.h
@@ -157,6 +157,10 @@ class BasicValueFactory {
 
   const llvm::APSInt &Convert(QualType T, const llvm::APSInt &From) {
     APSIntType TargetType = getAPSIntType(T);
+    return Convert(TargetType, From);
+  }
+
+  const llvm::APSInt &Convert(APSIntType TargetType, const llvm::APSInt &From) {
     if (TargetType == APSIntType(From))
       return From;
 
@@ -177,11 +181,19 @@ class BasicValueFactory {
   }
 
   const llvm::APSInt &getMaxValue(QualType T) {
-    return getValue(getAPSIntType(T).getMaxValue());
+    return getMaxValue(getAPSIntType(T));
   }
 
   const llvm::APSInt &getMinValue(QualType T) {
-    return getValue(getAPSIntType(T).getMinValue());
+    return getMinValue(getAPSIntType(T));
+  }
+
+  const llvm::APSInt &getMaxValue(APSIntType T) {
+    return getValue(T.getMaxValue());
+  }
+
+  const llvm::APSInt &getMinValue(APSIntType T) {
+    return getValue(T.getMinValue());
   }
 
   const llvm::APSInt &Add1(const llvm::APSInt &V) {

diff  --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
index 97c9c6d63eb2..a42eebd7d4e8 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h
@@ -107,14 +107,17 @@ class RangeSet {
     return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : nullptr;
   }
 
+  /// Get a minimal value covered by the ranges in the set
+  const llvm::APSInt &getMinValue() const;
+  /// Get a maximal value covered by the ranges in the set
+  const llvm::APSInt &getMaxValue() const;
+
 private:
   void IntersectInRange(BasicValueFactory &BV, Factory &F,
                         const llvm::APSInt &Lower, const llvm::APSInt &Upper,
                         PrimRangeSet &newRanges, PrimRangeSet::iterator &i,
                         PrimRangeSet::iterator &e) const;
 
-  const llvm::APSInt &getMinValue() const;
-
   bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const;
 
 public:
@@ -131,7 +134,6 @@ class RangeSet {
   }
 };
 
-
 class ConstraintRange {};
 using ConstraintRangeTy = llvm::ImmutableMap<SymbolRef, RangeSet>;
 

diff  --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index 368324d3d34f..570161543805 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -69,7 +69,19 @@ void RangeSet::IntersectInRange(BasicValueFactory &BV, Factory &F,
 
 const llvm::APSInt &RangeSet::getMinValue() const {
   assert(!isEmpty());
-  return ranges.begin()->From();
+  return begin()->From();
+}
+
+const llvm::APSInt &RangeSet::getMaxValue() const {
+  assert(!isEmpty());
+  // NOTE: It's a shame that we can't implement 'getMaxValue' without scanning
+  //       the whole tree to get to the last element.
+  //       llvm::ImmutableSet should support decrement for 'end' iterators
+  //       or reverse order iteration.
+  auto It = begin();
+  for (auto End = end(); std::next(It) != End; ++It) {
+  }
+  return It->To();
 }
 
 bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
@@ -426,22 +438,106 @@ class SymbolicRangeInferrer
     }
   }
 
+  //===----------------------------------------------------------------------===//
+  //                         Ranges and operators
+  //===----------------------------------------------------------------------===//
+
+  /// Return a rough approximation of the given range set.
+  ///
+  /// For the range set:
+  ///   { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
+  /// it will return the range [x_0, y_N].
+  static Range fillGaps(RangeSet Origin) {
+    assert(!Origin.isEmpty());
+    return {Origin.getMinValue(), Origin.getMaxValue()};
+  }
+
+  /// Try to convert given range into the given type.
+  ///
+  /// It will return llvm::None only when the trivial conversion is possible.
+  llvm::Optional<Range> convert(const Range &Origin, APSIntType To) {
+    if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
+        To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
+      return llvm::None;
+    }
+    return Range(ValueFactory.Convert(To, Origin.From()),
+                 ValueFactory.Convert(To, Origin.To()));
+  }
+
   RangeSet VisitOrOperator(RangeSet LHS, RangeSet RHS, QualType T) {
-    // TODO: generalize for the ranged RHS.
-    if (const llvm::APSInt *RHSConstant = RHS.getConcreteValue()) {
-      // For unsigned types, the output is greater-or-equal than RHS.
-      if (T->isUnsignedIntegerType()) {
-        return LHS.Intersect(ValueFactory, RangeFactory, *RHSConstant,
-                             ValueFactory.getMaxValue(T));
-      }
+    // We should propagate information about unfeasbility of one of the
+    // operands to the resulting range.
+    if (LHS.isEmpty() || RHS.isEmpty()) {
+      return RangeFactory.getEmptySet();
+    }
 
-      // Bitwise-or with a non-zero constant is always non-zero.
-      const llvm::APSInt &Zero = ValueFactory.getAPSIntType(T).getZeroValue();
-      if (*RHSConstant != Zero) {
-        return assumeNonZero(LHS, T);
-      }
+    APSIntType ResultType = ValueFactory.getAPSIntType(T);
+    RangeSet DefaultRange = infer(T);
+
+    Range CoarseLHS = fillGaps(LHS);
+    Range CoarseRHS = fillGaps(RHS);
+
+    // We need to convert ranges to the resulting type, so we can compare values
+    // and combine them in a meaningful (in terms of the given operation) way.
+    auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
+    auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
+
+    // It is hard to reason about ranges when conversion changes
+    // borders of the ranges.
+    if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
+      return DefaultRange;
     }
-    return infer(T);
+
+    llvm::APSInt Zero = ResultType.getZeroValue();
+
+    bool IsLHSPositiveOrZero = ConvertedCoarseLHS->From() >= Zero;
+    bool IsRHSPositiveOrZero = ConvertedCoarseRHS->From() >= Zero;
+
+    bool IsLHSNegative = ConvertedCoarseLHS->To() < Zero;
+    bool IsRHSNegative = ConvertedCoarseRHS->To() < Zero;
+
+    // Check if both ranges have the same sign.
+    if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
+        (IsLHSNegative && IsRHSNegative)) {
+      // The result is definitely greater or equal than any of the operands.
+      const llvm::APSInt &Min =
+          std::max(ConvertedCoarseLHS->From(), ConvertedCoarseRHS->From());
+
+      // We estimate maximal value for positives as the maximal value for the
+      // given type.  For negatives, we estimate it with -1 (e.g. 0x11111111).
+      //
+      // TODO: We basically, limit the resulting range from below (in absolute
+      //       numbers), but don't do anything with the upper bound.
+      //       For positive operands, it can be done as follows: for the upper
+      //       bound of LHS and RHS we calculate the most significant bit set.
+      //       Let's call it the N-th bit.  Then we can estimate the maximal
+      //       number to be 2^(N+1)-1, i.e. the number with all the bits up to
+      //       the N-th bit set.
+      const llvm::APSInt &Max = IsLHSNegative
+                                    ? ValueFactory.getValue(--Zero)
+                                    : ValueFactory.getMaxValue(ResultType);
+
+      return {RangeFactory, ValueFactory.getValue(Min), Max};
+    }
+
+    // Otherwise, let's check if at least one of the operands is negative.
+    if (IsLHSNegative || IsRHSNegative) {
+      // This means that the result is definitely negative as well.
+      return {RangeFactory, ValueFactory.getMinValue(ResultType),
+              ValueFactory.getValue(--Zero)};
+    }
+
+    // It is pretty hard to reason about operands with 
diff erent signs
+    // (and especially with possibly 
diff erent signs).  We simply check if it
+    // can be zero.  In order to conclude that the result could not be zero,
+    // at least one of the operands should be definitely not zero itself.
+    if (!ConvertedCoarseLHS->Includes(Zero) ||
+        !ConvertedCoarseRHS->Includes(Zero)) {
+      return assumeNonZero(DefaultRange, T);
+    }
+
+    // Nothing much else to do here.
+    return DefaultRange;
   }
 
   RangeSet VisitAndOperator(RangeSet LHS, RangeSet RHS, QualType T) {

diff  --git a/clang/test/Analysis/constant-folding.c b/clang/test/Analysis/constant-folding.c
index 5429f3c740e1..1fdd474dc90a 100644
--- a/clang/test/Analysis/constant-folding.c
+++ b/clang/test/Analysis/constant-folding.c
@@ -77,7 +77,7 @@ void testMixedTypeComparisons (char a, unsigned long b) {
   clang_analyzer_eval(a != b); // expected-warning{{TRUE}}
 }
 
-void testBitwiseRules(unsigned int a, int b) {
+void testBitwiseRules(unsigned int a, int b, int c) {
   clang_analyzer_eval((a | 1) >= 1); // expected-warning{{TRUE}}
   clang_analyzer_eval((a | -1) >= -1); // expected-warning{{TRUE}}
   clang_analyzer_eval((a | 2) >= 2); // expected-warning{{TRUE}}
@@ -96,9 +96,9 @@ void testBitwiseRules(unsigned int a, int b) {
   // Again, check for 
diff erent argument order.
   clang_analyzer_eval((1 & a) <= 1); // expected-warning{{TRUE}}
 
-  unsigned int c = a;
-  c |= 1;
-  clang_analyzer_eval((c | 0) == 0); // expected-warning{{FALSE}}
+  unsigned int d = a;
+  d |= 1;
+  clang_analyzer_eval((d | 0) == 0); // expected-warning{{FALSE}}
 
   // Rules don't apply to signed typed, as the values might be negative.
   clang_analyzer_eval((b | 1) > 0); // expected-warning{{UNKNOWN}}
@@ -108,20 +108,47 @@ void testBitwiseRules(unsigned int a, int b) {
   clang_analyzer_eval((b | -2) == 0); // expected-warning{{FALSE}}
   clang_analyzer_eval((b | 10) == 0); // expected-warning{{FALSE}}
   clang_analyzer_eval((b | 0) == 0); // expected-warning{{UNKNOWN}}
-#ifdef ANALYZER_CM_Z3
   clang_analyzer_eval((b | -2) >= 0); // expected-warning{{FALSE}}
-#else
-  clang_analyzer_eval((b | -2) >= 0); // expected-warning{{UNKNOWN}}
-#endif
+
+  // Check that we can operate with negative ranges
+  if (b < 0) {
+    clang_analyzer_eval((b | -1) == -1);   // expected-warning{{TRUE}}
+    clang_analyzer_eval((b | -10) >= -10); // expected-warning{{TRUE}}
+
+    int e = (b | -5);
+    clang_analyzer_eval(e >= -5 && e <= -1); // expected-warning{{TRUE}}
+
+    if (b < -20) {
+      clang_analyzer_eval((b | e) >= -5); // expected-warning{{TRUE}}
+    }
+
+    // Check that we can reason about the result even if know nothing
+    // about one of the operands.
+    clang_analyzer_eval((b | c) != 0); // expected-warning{{TRUE}}
+  }
+
+  if (a <= 30 && b >= 10 && c >= 20) {
+    // Check that we can reason about non-constant operands.
+    clang_analyzer_eval((b | c) >= 20); // expected-warning{{TRUE}}
+
+    // Check that we can reason about the resulting range even if
+    // the types are not the same, but we still can convert operand
+    // ranges.
+    clang_analyzer_eval((a | b) >= 10); // expected-warning{{TRUE}}
+  }
 
   // Check that dynamically computed constants also work.
   unsigned int constant = 1 << 3;
-  unsigned int d = a | constant;
-  clang_analyzer_eval(d >= constant); // expected-warning{{TRUE}}
+  unsigned int f = a | constant;
+  clang_analyzer_eval(f >= constant); // expected-warning{{TRUE}}
 
   // Check that nested expressions also work.
   clang_analyzer_eval(((a | 10) | 5) >= 10); // expected-warning{{TRUE}}
 
+  if (a < 10) {
+    clang_analyzer_eval((a | 20) >= 20); // expected-warning{{TRUE}}
+  }
+
   // TODO: We misuse intersection of ranges for bitwise AND and OR operators.
   //       Resulting ranges for the following cases are infeasible.
   //       This is what causes paradoxical results below.
@@ -129,8 +156,4 @@ void testBitwiseRules(unsigned int a, int b) {
     clang_analyzer_eval((a & 1) <= 1); // expected-warning{{FALSE}}
     clang_analyzer_eval((a & 1) > 1);  // expected-warning{{FALSE}}
   }
-  if (a < 10) {
-    clang_analyzer_eval((a | 20) >= 20); // expected-warning{{FALSE}}
-    clang_analyzer_eval((a | 20) < 20);  // expected-warning{{FALSE}}
-  }
 }