[clang] [analyzer] Reduce constraint on modulo with small concrete range (PR #65448)
Ding Fei via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 15 10:47:17 PDT 2023
https://github.com/danix800 updated https://github.com/llvm/llvm-project/pull/65448
>From 111de0e99482cf8c8c206286bb9a4f40149992c6 Mon Sep 17 00:00:00 2001
From: dingfei <fding at feysh.com>
Date: Wed, 6 Sep 2023 10:03:21 +0800
Subject: [PATCH] [analyzer] Improve constraint inferring on concrete div/mod
1. Reduce constraint on modulo with small concrete range when assigned;
2. Improve constraint inferring on modulo over concrete value;
3. Improve constraint inferring on division over concrete value.
Fixes #54377
---
.../Core/RangeConstraintManager.cpp | 157 ++++++++++++++++++
clang/test/Analysis/constraint-assignor.c | 108 ++++++++++++
clang/test/Analysis/constraint-infer-div.c | 64 +++++++
clang/test/Analysis/constraint-infer-mod.c | 98 +++++++++++
4 files changed, 427 insertions(+)
create mode 100644 clang/test/Analysis/constraint-infer-div.c
create mode 100644 clang/test/Analysis/constraint-infer-mod.c
diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index 5de99384449a4c8..15f5246335d9d26 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -1338,6 +1338,9 @@ class SymbolicRangeInferrer
RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
RangeSet RHS, QualType T);
+ RangeSet handleConcreteModulo(Range LHS, llvm::APSInt Modulo, QualType T);
+ RangeSet handleRangeModulo(Range LHS, Range RHS, QualType T);
+
//===----------------------------------------------------------------------===//
// Ranges and operators
//===----------------------------------------------------------------------===//
@@ -1771,6 +1774,14 @@ template <>
RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
Range RHS,
QualType T) {
+ if (const llvm::APSInt *ModOrNull = RHS.getConcreteValue())
+ return handleConcreteModulo(LHS, *ModOrNull, T);
+
+ return handleRangeModulo(LHS, RHS, T);
+}
+
+RangeSet SymbolicRangeInferrer::handleRangeModulo(Range LHS, Range RHS,
+ QualType T) {
llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
Range ConservativeRange = getSymmetricalRange(RHS, T);
@@ -1824,6 +1835,101 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
}
+RangeSet SymbolicRangeInferrer::handleConcreteModulo(Range LHS,
+ llvm::APSInt Modulo,
+ QualType T) {
+ APSIntType ResultType = ValueFactory.getAPSIntType(T);
+ llvm::APSInt Zero = ResultType.getZeroValue();
+ llvm::APSInt One = ResultType.getValue(1);
+
+ if (Modulo == Zero)
+ return RangeFactory.getEmptySet();
+
+ // Quick path, this also avoids unary '-' overflow on MinValue modulo.
+ if (Modulo == ValueFactory.getMinValue(T) ||
+ Modulo == ValueFactory.getMaxValue(T))
+ return RangeFactory.getRangeSet(LHS);
+
+ // Normalize to positive modulo since a % N == a % -N
+ if (Modulo < 0)
+ Modulo = -Modulo;
+
+ auto ComputeModuloN = [&](llvm::APSInt From, llvm::APSInt To,
+ llvm::APSInt N) -> RangeSet {
+ assert(N > Zero && "Positive N expected!");
+ bool NonNegative = From >= Zero;
+ assert(NonNegative == (To >= Zero) && "Signedness mismatch!");
+
+ if (From > To)
+ return RangeFactory.getEmptySet();
+
+ llvm::APSInt N1 = N - One;
+
+ // spans [0, N - 1] if NonNegative or [-(N - 1), 0] otherwise.
+ if ((To - From) / N > Zero)
+ return {RangeFactory, ValueFactory.getValue(NonNegative ? Zero : -N1),
+ ValueFactory.getValue(NonNegative ? N1 : Zero)};
+
+ llvm::APSInt Min = From % N;
+ llvm::APSInt Max = To % N;
+
+ if (Min < Max) // [Min, Max]
+ return {RangeFactory, ValueFactory.getValue(Min),
+ ValueFactory.getValue(Max)};
+
+ // [0, Max] U [Min, N - 1] if NonNegative, or
+ // [-(N - 1), Max] U [Min, 0] otherwise.
+ const llvm::APSInt &Min1 = ValueFactory.getValue(NonNegative ? Zero : -N1);
+ const llvm::APSInt &Max1 = ValueFactory.getValue(Max);
+ RangeSet RS1{RangeFactory, Min1, Max1};
+
+ const llvm::APSInt &Min2 = ValueFactory.getValue(Min);
+ const llvm::APSInt &Max2 = ValueFactory.getValue(NonNegative ? N1 : Zero);
+ RangeSet RS2{RangeFactory, Min2, Max2};
+
+ return RangeFactory.unite(RS1, RS2);
+ };
+
+ if (!LHS.Includes(Zero))
+ return ComputeModuloN(LHS.From(), LHS.To(), Modulo);
+
+ // split over [From, -1] U [0, To] for easy handling.
+ return RangeFactory.unite(ComputeModuloN(LHS.From(), -One, Modulo),
+ ComputeModuloN(Zero, LHS.To(), Modulo));
+}
+
+template <>
+RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Div>(Range LHS,
+ Range RHS,
+ QualType T) {
+ const llvm::APSInt *DividerOrNull = RHS.getConcreteValue();
+ if (!DividerOrNull)
+ return infer(T);
+
+ const llvm::APSInt &D = *DividerOrNull;
+
+ APSIntType ResultType = ValueFactory.getAPSIntType(T);
+ llvm::APSInt Zero = ResultType.getZeroValue();
+ llvm::APSInt One = ResultType.getValue(1);
+ if (D == Zero)
+ return RangeFactory.getEmptySet();
+
+ if (D == -One) { // might overflow
+ if (LHS.To().isMinSignedValue())
+ return {RangeFactory, LHS.To(), LHS.To()};
+ else if (LHS.From().isMinSignedValue()) {
+ const llvm::APSInt &From = ValueFactory.getValue((LHS.From() + One) / D);
+ const llvm::APSInt &To = ValueFactory.getValue(LHS.To() / D);
+ RangeSet RS{RangeFactory, To, From};
+ return RangeFactory.unite(RS, LHS.From());
+ }
+ }
+
+ const llvm::APSInt &From = ValueFactory.getValue(LHS.From() / D);
+ const llvm::APSInt &To = ValueFactory.getValue(LHS.To() / D);
+ return {RangeFactory, D < Zero ? To : From, D < Zero ? From : To};
+}
+
RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
BinaryOperator::Opcode Op,
RangeSet RHS, QualType T) {
@@ -1842,6 +1948,8 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator(RangeSet LHS,
return VisitBinaryOperator<BO_And>(LHS, RHS, T);
case BO_Rem:
return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
+ case BO_Div:
+ return VisitBinaryOperator<BO_Div>(LHS, RHS, T);
default:
return infer(T);
}
@@ -2073,6 +2181,14 @@ class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
if (Sym->getOpcode() != BO_Rem)
return true;
// a % b != 0 implies that a != 0.
+ // Z3 verification:
+ // (declare-const a Int)
+ // (declare-const m Int)
+ // (assert (not (= m 0)))
+ // (assert (not (= (mod a m) 0)))
+ // (assert (= a 0))
+ // (check-sat)
+ // ; unsat
if (!Constraint.containsZero()) {
SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
@@ -2080,6 +2196,47 @@ class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
if (!State)
return false;
}
+ } else if (const auto *SIE = dyn_cast<SymIntExpr>(Sym);
+ SIE && Constraint.encodesFalseRange()) {
+ // a % m == 0 && a in [x, y] && y - x < m implies that
+ // a = (y < 0 ? x : y) / m * m which is a 'ConcreteInt'
+ // where x and m are 'ConcreteInt'.
+ //
+ // Z3 verification:
+ // (declare-const a Int)
+ // (declare-const m Int)
+ // (declare-const x Int)
+ // (declare-const y Int)
+ // (assert (= (mod a m) 0))
+ // (assert (< (- y x) m))
+ // (assert (and (>= a x) (<= a y)))
+ // (assert (not (= a (* (div y m) m))))
+ // (check-sat)
+ // ; unsat
+ BasicValueFactory &ValueFactory = RangeFactory.getValueFactory();
+ APSIntType ResultType = ValueFactory.getAPSIntType(SIE->getType());
+ llvm::APSInt N = SIE->getRHS();
+ if (N < 0)
+ N = -N;
+ N = ResultType.convert(N);
+
+ SymbolRef Sym = SIE->getLHS();
+ RangeSet RS = SymbolicRangeInferrer::inferRange(RangeFactory, State, Sym);
+ if (RS.size() == 1) {
+ Range R = *RS.begin();
+ llvm::APSInt Distance = ResultType.convert(R.To()).extend(64) -
+ ResultType.convert(R.From()).extend(64);
+ if (Distance < 0) // overflows
+ return true;
+
+ if (Distance < N.extend(64)) {
+ const llvm::APSInt &Point = ValueFactory.getValue(
+ (ResultType.convert(R.To() > 0 ? R.To() : R.From())) / N * N);
+ State = assign(Sym, {RangeFactory, Point, Point});
+ if (!State)
+ return false;
+ }
+ }
}
return true;
}
diff --git a/clang/test/Analysis/constraint-assignor.c b/clang/test/Analysis/constraint-assignor.c
index 8210e576c98bd21..8af3dd33653773d 100644
--- a/clang/test/Analysis/constraint-assignor.c
+++ b/clang/test/Analysis/constraint-assignor.c
@@ -5,6 +5,7 @@
void clang_analyzer_warnIfReached(void);
void clang_analyzer_eval(int);
+void clang_analyzer_dump(int);
void rem_constant_rhs_ne_zero(int x, int y) {
if (x % 3 == 0) // x % 3 != 0 -> x != 0
@@ -82,3 +83,110 @@ void remainder_with_adjustment_of_composit_lhs(int x, int y) {
clang_analyzer_eval(x + y != -1); // expected-warning{{TRUE}}
(void)(x * y); // keep the constraints alive.
}
+
+void remainder_infeasible_positive_range(int x) {
+ if (x <= 2 || x >= 5)
+ return;
+ if (x % 5 != 0)
+ return;
+ clang_analyzer_warnIfReached(); // no-warning
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_infeasible_negative_range(int x) {
+ if (x <= -14 || x >= -1)
+ return;
+ if (x % 15 != 0)
+ return;
+ clang_analyzer_warnIfReached(); // no-warning
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_within_modulo_positive_range_unsigned_1(unsigned x) {
+ if (x <= 2 || x > 6)
+ return;
+ if (x % 5 != 0)
+ return;
+ clang_analyzer_dump(x); // expected-warning{{5 S32b}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_within_modulo_positive_range_unsigned_2(unsigned char x) {
+ if (x < 252 || x > 254)
+ return;
+ if (x % 5 != 0)
+ return;
+ clang_analyzer_dump(x); // no-warning
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_within_modulo_positive_range_unsigned_3(unsigned x) {
+ if (x < 4294967289 || x > 4294967294)
+ return;
+ if (x % 10 != 0)
+ return;
+ clang_analyzer_eval(x == 4294967290); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_within_modulo_positive_range(int x) {
+ if (x <= 2 || x > 6)
+ return;
+ if (x % 5 != 0)
+ return;
+ clang_analyzer_dump(x); // expected-warning{{5 S32b}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_within_modulo_range_spans_zero(int x) {
+ if (x <= -2 || x > 2)
+ return;
+ if (x % 5 != 0)
+ return;
+ clang_analyzer_dump(x); // expected-warning{{0 S32b}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_within_modulo_negative_range(int x) {
+ if (x <= -7 || x > -2)
+ return;
+ if (x % 5 != 0)
+ return;
+ clang_analyzer_dump(x); // expected-warning{{-5 S32b}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_within_modulo_range_neg_mod(int x) {
+ if (x <= 2 || x > 6)
+ return;
+ if (x % -5 != 0)
+ return;
+ clang_analyzer_dump(x); // expected-warning{{5 S32b}}
+ (void)x; // keep the constraints alive.
+}
+
+#define LONG_MAX 0x7fffffffffffffffLL
+#define LONG_MIN (-LONG_MAX - 1LL)
+
+void remainder_within_modulo_distance_overflow(long long x) {
+ if (x < LONG_MIN + 1 || x > LONG_MAX - 1)
+ return;
+
+ if (x % 10 != 0)
+ return;
+ clang_analyzer_dump(x); // expected-warning{{reg_$0<long long x>}}
+ (void)x; // keep the constraints alive.
+}
+
+#define CHAR_MAX 0x7f
+#define CHAR_MIN (-CHAR_MAX - 1)
+
+void remainder_within_modulo_not_overflow(char x) {
+ if (x < CHAR_MIN + 1 || x > CHAR_MAX - 1)
+ return;
+
+ if (x % (CHAR_MAX * 2) != 0)
+ return;
+ clang_analyzer_dump(x); // expected-warning{{0 S32b}}
+ (void)x; // keep the constraints alive.
+}
diff --git a/clang/test/Analysis/constraint-infer-div.c b/clang/test/Analysis/constraint-infer-div.c
new file mode 100644
index 000000000000000..1ff120fba16e0df
--- /dev/null
+++ b/clang/test/Analysis/constraint-infer-div.c
@@ -0,0 +1,64 @@
+// RUN: %clang_analyze_cc1 %s \
+// RUN: -analyzer-checker=core \
+// RUN: -analyzer-checker=debug.ExprInspection \
+// RUN: -verify
+
+void clang_analyzer_eval(int);
+
+void div_infer_positive_divider(int x) {
+ if (x < 1 || x > 5)
+ return;
+
+ int i = x / 2;
+ clang_analyzer_eval(i >= 0 && i <= 2); // expected-warning{{TRUE}}
+}
+
+void div_infer_negative_divider_positive_range(int x) {
+ if (x < 1 || x > 2)
+ return;
+
+ int i = x / -2;
+ clang_analyzer_eval(i >= -1 && i <= 0); // expected-warning{{TRUE}}
+}
+
+void div_infer_negative_divider_negative_range(int x) {
+ if (x < -2 || x > 0)
+ return;
+
+ int i = x / -2;
+ clang_analyzer_eval(i >= 0 && i <= 1); // expected-warning{{TRUE}}
+}
+
+void div_infer_positive_divider_positive_range(int x) {
+ if (x < 0 || x > 5)
+ return;
+
+ int i = x / 6;
+ clang_analyzer_eval(i == 0); // expected-warning{{TRUE}}
+}
+
+#define LONG_MAX 0x7fffffffffffffffLL
+#define LONG_MIN (-LONG_MAX - 1LL)
+
+void div_infer_overflow_long(long long x) {
+ if (x > LONG_MIN + 1)
+ return;
+
+ // x in [LONG_MIN, LONG_MIN + 1]
+ clang_analyzer_eval(x >= LONG_MIN && x <= LONG_MIN + 1); // expected-warning{{TRUE}}
+ long long i = x / -1;
+ clang_analyzer_eval(i == LONG_MIN || i == LONG_MAX); // expected-warning{{TRUE}}
+}
+
+#define INT_MAX 0x7fffffff
+#define INT_MIN (-INT_MAX - 1)
+
+void div_infer_overflow_int(int x) {
+ if (x > INT_MIN + 1)
+ return;
+
+ // x in [INT_MIN, INT_MIN + 1]
+ clang_analyzer_eval(x >= INT_MIN && x <= INT_MIN + 1); // expected-warning{{TRUE}}
+ int i = x / -1;
+ clang_analyzer_eval(i == INT_MIN || i == INT_MAX); // expected-warning{{TRUE}}
+}
diff --git a/clang/test/Analysis/constraint-infer-mod.c b/clang/test/Analysis/constraint-infer-mod.c
new file mode 100644
index 000000000000000..2dff3667d64adfc
--- /dev/null
+++ b/clang/test/Analysis/constraint-infer-mod.c
@@ -0,0 +1,98 @@
+// RUN: %clang_analyze_cc1 %s \
+// RUN: -analyzer-checker=core \
+// RUN: -analyzer-checker=debug.ExprInspection \
+// RUN: -verify
+
+void clang_analyzer_warnIfReached(void);
+void clang_analyzer_eval(int);
+void clang_analyzer_dump(int);
+
+void remainder_infer_positive_range(int x) {
+ if (x < 5 || x > 7)
+ return;
+
+ int y = x % 16;
+ clang_analyzer_eval(y >= 5 && y <= 7); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_infer_positive_range_full(int x) {
+ if (x < 9 || x > 51)
+ return;
+
+ int y = x % 10;
+ clang_analyzer_eval(y >= 0 && y <= 9); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_infer_negative_range(int x) {
+ if (x < -7 || x > -5)
+ return;
+
+ int y = x % 16;
+ clang_analyzer_eval(y >= -7 && y <= -5); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_infer_positive_range_wraparound(int x) {
+ if (x < 30 || x > 33)
+ return;
+
+ int y = x % 16;
+ clang_analyzer_eval((y >= 0 && y <= 1) || (y >= 14 && y <= 15)); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_infer_negative_range_wraparound(int x) {
+ if (x < -33 || x > -30)
+ return;
+
+ int y = x % 16;
+ clang_analyzer_eval((y >= -1 && y <= 0) || (y >= -15 && y <= -14)); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_infer_range_spans_zero(int x) {
+ if (x < -7 || x > 5)
+ return;
+
+ int y = x % 10;
+ clang_analyzer_eval(y >= -7 && y <= 5); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_infer_mod_one(long long x) {
+ long long y = x % 1;
+ clang_analyzer_eval(y == 0); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+void remainder_infer_mod_minus_one(long long x) {
+ long long y = x % -1;
+ clang_analyzer_eval(y == 0); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+#define LONG_MAX 0x7fffffffffffffffL
+#define LONG_MIN (-LONG_MAX - 1L)
+
+void remainder_infer_range_mod_long_min(long x) {
+ if (x < -7 || x > 5)
+ return;
+
+ long y = x % LONG_MIN;
+ clang_analyzer_eval(y >= -7 && y <= 5); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
+
+#define INT_MAX 0x7fffffff
+#define INT_MIN (-INT_MAX - 1)
+
+void remainder_infer_range_mod_int_min(long x) {
+ if (x < -7 || x > 5)
+ return;
+
+ int y = x % INT_MIN;
+ clang_analyzer_eval(y >= -7 && y <= 5); // expected-warning{{TRUE}}
+ (void)x; // keep the constraints alive.
+}
More information about the cfe-commits
mailing list