[clang] WIP Add suppress weak loop assumptions visitor (PR #111258)
Balazs Benics via cfe-commits
cfe-commits at lists.llvm.org
Sat Oct 5 08:11:27 PDT 2024
https://github.com/steakhal created https://github.com/llvm/llvm-project/pull/111258
This is an alternative implementation of the idea present in #109804.
In this implementation, the suppression is purely implemented by a `BugReportVisitor`, to avoid coupling the suppression with the engine itself.
The idea is to visit the bug-path, and along the way record each time we entered the basic-block that has a comparison in the end that drives the decision of entering or avoiding a loop construct.
We also record expressions that caused a state-split. These are basically the expressions that eagerly assume would force a split on - but this approach works even if eager-assumptions are disabled.
Once we collected all the data, we go over the loops that we saw, and check if the condition of that loop construct was evaluated N times. This then triggers a heuristic to decide if we should suppress this, e.g. because we "assumed" not to enter the loop, or "assumed" to iterate more than 2 times in the loop.
---
This is WIP, because I just scratched the idea, but it seems to work good enough to have some discussion about this, while refining it if I have some time later.
>From c429df63af6cdeec7e721e8f9c322f1ac6ba3b3d Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 5 Oct 2024 15:14:37 +0200
Subject: [PATCH 1/5] Use the eager assumption tags only if the decision was
ambiguous
---
clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 15 ++++++++----
clang/test/Analysis/assuming-unsigned-ge-0.c | 5 +---
clang/test/Analysis/cast-value-notes.cpp | 20 +++++++---------
clang/test/Analysis/cast-value-state-dump.cpp | 5 ++--
.../std-c-library-functions-arg-constraints.c | 24 +++++++------------
5 files changed, 29 insertions(+), 40 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index b1919d7027cf4d..62066632db87d0 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -3771,24 +3771,29 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
SVal V = state->getSVal(Ex, Pred->getLocationContext());
std::optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>();
if (SEV && SEV->isExpression()) {
- const std::pair<const ProgramPointTag *, const ProgramPointTag*> &tags =
- geteagerlyAssumeBinOpBifurcationTags();
-
ProgramStateRef StateTrue, StateFalse;
std::tie(StateTrue, StateFalse) = state->assume(*SEV);
+ const ProgramPointTag *EagerTrueTag = nullptr;
+ const ProgramPointTag *EagerFalseTag = nullptr;
+
+ // Use the eager assumption tags if the choice was ambiguous.
+ if (StateTrue && StateFalse)
+ std::tie(EagerTrueTag, EagerFalseTag) =
+ geteagerlyAssumeBinOpBifurcationTags();
+
// First assume that the condition is true.
if (StateTrue) {
SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val);
- Bldr.generateNode(Ex, Pred, StateTrue, tags.first);
+ Bldr.generateNode(Ex, Pred, StateTrue, EagerTrueTag);
}
// Next, assume that the condition is false.
if (StateFalse) {
SVal Val = svalBuilder.makeIntVal(0U, Ex->getType());
StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val);
- Bldr.generateNode(Ex, Pred, StateFalse, tags.second);
+ Bldr.generateNode(Ex, Pred, StateFalse, EagerFalseTag);
}
}
}
diff --git a/clang/test/Analysis/assuming-unsigned-ge-0.c b/clang/test/Analysis/assuming-unsigned-ge-0.c
index 553e68cb96c6bd..18e637a6e5310e 100644
--- a/clang/test/Analysis/assuming-unsigned-ge-0.c
+++ b/clang/test/Analysis/assuming-unsigned-ge-0.c
@@ -2,10 +2,7 @@
// RUN: -analyzer-checker=core -verify %s
int assuming_unsigned_ge_0(unsigned arg) {
- // TODO This testcase demonstrates the current incorrect behavior of Clang
- // Static Analyzer: here 'arg' is unsigned, so "arg >= 0" is not a fresh
- // assumption, but it still appears in the diagnostics as if it's fresh:
- // expected-note at +2 {{Assuming 'arg' is >= 0}}
+ // expected-note at +2 {{'arg' is >= 0}}
// expected-note at +1 {{Taking false branch}}
if (arg < 0)
return 0;
diff --git a/clang/test/Analysis/cast-value-notes.cpp b/clang/test/Analysis/cast-value-notes.cpp
index 7ee224dc6e5d8f..0235404e73adf6 100644
--- a/clang/test/Analysis/cast-value-notes.cpp
+++ b/clang/test/Analysis/cast-value-notes.cpp
@@ -168,9 +168,8 @@ void evalNonNullParamNonNullReturnReference(const Shape &S) {
// expected-note at -2 {{Taking true branch}}
(void)(1 / !C);
- // expected-note at -1 {{'C' is non-null}}
- // expected-note at -2 {{Division by zero}}
- // expected-warning at -3 {{Division by zero}}
+ // expected-note at -1 {{Division by zero}}
+ // expected-warning at -2 {{Division by zero}}
}
}
@@ -226,9 +225,8 @@ void evalNonNullParamNonNullReturn(const Shape *S) {
// expected-note at -2 {{Taking true branch}}
(void)(1 / !C);
- // expected-note at -1 {{'C' is non-null}}
- // expected-note at -2 {{Division by zero}}
- // expected-warning at -3 {{Division by zero}}
+ // expected-note at -1 {{Division by zero}}
+ // expected-warning at -2 {{Division by zero}}
}
}
@@ -243,9 +241,8 @@ void evalNonNullParamNullReturn(const Shape *S) {
// expected-note at -4 {{Taking true branch}}
(void)(1 / !T);
- // expected-note at -1 {{'T' is non-null}}
- // expected-note at -2 {{Division by zero}}
- // expected-warning at -3 {{Division by zero}}
+ // expected-note at -1 {{Division by zero}}
+ // expected-warning at -2 {{Division by zero}}
}
}
@@ -265,9 +262,8 @@ void evalZeroParamNonNullReturnPointer(const Shape *S) {
// expected-note at -2 {{'C' initialized here}}
(void)(1 / !C);
- // expected-note at -1 {{'C' is non-null}}
- // expected-note at -2 {{Division by zero}}
- // expected-warning at -3 {{Division by zero}}
+ // expected-note at -1 {{Division by zero}}
+ // expected-warning at -2 {{Division by zero}}
}
void evalZeroParamNonNullReturn(const Shape &S) {
diff --git a/clang/test/Analysis/cast-value-state-dump.cpp b/clang/test/Analysis/cast-value-state-dump.cpp
index 07fd7abd848ab7..97f3b2236db087 100644
--- a/clang/test/Analysis/cast-value-state-dump.cpp
+++ b/clang/test/Analysis/cast-value-state-dump.cpp
@@ -40,8 +40,7 @@ void evalNonNullParamNonNullReturn(const Shape *S) {
// CHECK-NEXT: ] }
(void)(1 / !C);
- // expected-note at -1 {{'C' is non-null}}
- // expected-note at -2 {{Division by zero}}
- // expected-warning at -3 {{Division by zero}}
+ // expected-note at -1 {{Division by zero}}
+ // expected-warning at -2 {{Division by zero}}
}
diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.c b/clang/test/Analysis/std-c-library-functions-arg-constraints.c
index 0b817dda98c727..a6e0ad15129f60 100644
--- a/clang/test/Analysis/std-c-library-functions-arg-constraints.c
+++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.c
@@ -42,8 +42,7 @@ void test_alnum_symbolic(int x) {
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
// bugpath-note{{TRUE}} \
- // bugpath-note{{Left side of '&&' is true}} \
- // bugpath-note{{'x' is <= 255}}
+ // bugpath-note{{Left side of '&&' is true}}
}
void test_alnum_symbolic2(int x) {
@@ -76,8 +75,7 @@ void test_toupper_symbolic(int x) {
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
// bugpath-note{{TRUE}} \
- // bugpath-note{{Left side of '&&' is true}} \
- // bugpath-note{{'x' is <= 255}}
+ // bugpath-note{{Left side of '&&' is true}}
}
void test_toupper_symbolic2(int x) {
@@ -110,8 +108,7 @@ void test_tolower_symbolic(int x) {
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
// bugpath-note{{TRUE}} \
- // bugpath-note{{Left side of '&&' is true}} \
- // bugpath-note{{'x' is <= 255}}
+ // bugpath-note{{Left side of '&&' is true}}
}
void test_tolower_symbolic2(int x) {
@@ -144,8 +141,7 @@ void test_toascii_symbolic(int x) {
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
// bugpath-note{{TRUE}} \
- // bugpath-note{{Left side of '&&' is true}} \
- // bugpath-note{{'x' is <= 255}}
+ // bugpath-note{{Left side of '&&' is true}}
}
void test_toascii_symbolic2(int x) {
@@ -173,8 +169,7 @@ void test_notnull_symbolic(FILE *fp, int *buf) {
clang_analyzer_eval(buf != 0); // \
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
- // bugpath-note{{TRUE}} \
- // bugpath-note{{'buf' is not equal to null}}
+ // bugpath-note{{TRUE}}
}
void test_notnull_symbolic2(FILE *fp, int *buf) {
if (!buf) // bugpath-note{{Assuming 'buf' is null}} \
@@ -218,8 +213,7 @@ void test_notnull_buffer_3(void *buf) {
clang_analyzer_eval(buf != 0); // \
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
- // bugpath-note{{TRUE}} \
- // bugpath-note{{'buf' is not equal to null}}
+ // bugpath-note{{TRUE}}
}
void test_no_node_after_bug(FILE *fp, size_t size, size_t n, void *buf) {
@@ -299,8 +293,7 @@ void test_buf_size_symbolic(int s) {
clang_analyzer_eval(s <= 3); // \
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
- // bugpath-note{{TRUE}} \
- // bugpath-note{{'s' is <= 3}}
+ // bugpath-note{{TRUE}}
}
void test_buf_size_symbolic_and_offset(int s) {
char buf[3];
@@ -308,8 +301,7 @@ void test_buf_size_symbolic_and_offset(int s) {
clang_analyzer_eval(s <= 2); // \
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
- // bugpath-note{{TRUE}} \
- // bugpath-note{{'s' is <= 2}}
+ // bugpath-note{{TRUE}}
}
int __buf_size_arg_constraint_mul(const void *, size_t, size_t);
>From c240537d15f1275a4c587ace596682edf77ff5d6 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 5 Oct 2024 15:15:46 +0200
Subject: [PATCH 2/5] Add test from PR #109804
---
clang/test/Analysis/out-of-bounds.c | 150 +++++++++++++++++++++++++++-
1 file changed, 149 insertions(+), 1 deletion(-)
diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c
index 1f771c2b3bd138..fde53d148ba442 100644
--- a/clang/test/Analysis/out-of-bounds.c
+++ b/clang/test/Analysis/out-of-bounds.c
@@ -1,4 +1,9 @@
-// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify %s
+// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify=expected,eagerlyassume %s
+// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -analyzer-config eagerly-assume=false -verify %s
+
+// Note that eagerly-assume=false is tested separately because the
+// WeakLoopAssumption suppression heuristic uses different code paths to
+// achieve the same result with and without eagerly-assume.
void clang_analyzer_eval(int);
@@ -194,3 +199,146 @@ char test_comparison_with_extent_symbol(struct incomplete *p) {
return ((char *)p)[-1]; // no-warning
}
+// WeakLoopAssumption suppression
+///////////////////////////////////////////////////////////////////////
+
+int GlobalArray[100];
+int loop_suppress_after_zero_iterations(unsigned len) {
+ for (unsigned i = 0; i < len; i++)
+ if (GlobalArray[i] > 0)
+ return GlobalArray[i];
+ // Previously this would have produced an overflow warning because splitting
+ // the state on the loop condition introduced an execution path where the
+ // analyzer thinks that len == 0.
+ // There are many situations where the programmer knows that an argument is
+ // positive, but this is not indicated in the source code, so we must avoid
+ // reporting errors (especially out of bounds errors) on these branches,
+ // because otherwise we'd get prohibitively many false positives.
+ return GlobalArray[len - 1]; // no-warning
+}
+
+void loop_report_in_second_iteration(int len) {
+ int buf[1] = {0};
+ for (int i = 0; i < len; i++) {
+ // When a programmer writes a loop, we may assume that they intended at
+ // least two iterations.
+ buf[i] = 1; // expected-warning{{Out of bound access to memory}}
+ }
+}
+
+void loop_suppress_in_third_iteration(int len) {
+ int buf[2] = {0};
+ for (int i = 0; i < len; i++) {
+ // We should suppress array bounds errors on the third and later iterations
+ // of loops, because sometimes programmers write a loop in sitiuations
+ // where they know that there will be at most two iterations, but the
+ // analyzer cannot deduce this property.
+ buf[i] = 1; // no-warning
+ }
+}
+
+int no_suppression_when_no_assumption_zero_iterations(unsigned len) {
+ if (len != 0) {
+ // This 'if' introduces a state split between len == 0 and len != 0.
+ }
+
+ // On the branch where we assumed that len is zero, this loop will be
+ // skipped. We (intentionally) don't suppress this execution path becasue
+ // here the analyzer doesn't assume anything new when it evaluates the loop
+ // condition.
+ for (unsigned i = 0; i < len; i++)
+ if (GlobalArray[i] > 0)
+ return GlobalArray[i];
+
+ return GlobalArray[len - 1]; // expected-warning{{Out of bound access to memory}}
+}
+
+void no_suppression_when_no_assumption_third_iteration(int len) {
+ if (len < 20) {
+ // This 'if' introduces a state split with len >= 20 on one branch.
+ }
+
+ int buf[2] = {0};
+ for (int i = 0; i < len; i++) {
+ // As in no_suppression_when_no_assumption_zero_iterations, the suppression
+ // only activates when the analyzer assumes something new in the loop
+ // condition. On the branch where `len >= 20` entering the third iteration
+ // doesn't involve a new assumption, so this warning is not suppressed:
+ buf[i] = 1; // expected-warning{{Out of bound access to memory}}
+ }
+}
+
+void loop_suppress_in_third_iteration_logical_and(int len, int flag) {
+ int buf[2] = {0};
+ for (int i = 0; i < len && flag; i++) {
+ // FIXME: In this case the checker should suppress the warning the same way
+ // as it's suppressed in loop_suppress_in_third_iteration, but the
+ // suppression is not activated because the terminator statement associated
+ // with the loop is just the expression 'flag', while 'i < len' is a
+ // separate terminator statement that's associated with the
+ // short-circuiting operator '&&'.
+ // I have seen a real-world FP that looks like this, but it is much rarer
+ // than the basic setup.
+ buf[i] = 1; // expected-warning{{Out of bound access to memory}}
+ }
+}
+
+void loop_suppress_in_third_iteration_logical_and_2(int len, int flag) {
+ int buf[2] = {0};
+ for (int i = 0; flag && i < len; i++) {
+ // If the two operands of '&&' are flipped, the suppression works, because
+ // then 'flag' is the terminator statement associated with '&&' (which
+ // determines whether the short-circuiting happens or not) and 'i < len' is
+ // the terminator statement of the loop itself.
+ buf[i] = 1; // no-warning
+ }
+}
+
+void loop_suppress_in_third_iteration_cast(int len) {
+ int buf[2] = {0};
+ for (int i = 0; (unsigned)(i < len); i++) {
+ // The behavior of this suppression is slightly different under
+ // eagerly-assume=true (the default) and eagerly-assume=false:
+ // * When eager assumptions are disabled, it's enough to look for cases
+ // where we get two non-null states from splitting the state over the
+ // 'SVal' that represents the full loop condition.
+ // * When eager assumptions are enabled, we also accept situations when the
+ // loop condition expression triggers an eager state split and therefore
+ // we won't see a state split at the "normal" point because it's executed
+ // on two already separated execution paths.
+ // However, for the sake of simplicity we don't activate the suppression in
+ // cases when _a subexpression_ of the loop condition triggers an eager
+ // assumption. There are already many differences between analysis with and
+ // without eager assumptions, so it would be pointless to write more
+ // complicated code to eliminate these rare differences.
+ buf[i] = 1; // eagerlyassume-warning{{Out of bound access to memory}}
+ }
+}
+
+int coinflip(void);
+int do_while_report_after_one_iteration(void) {
+ int i = 0;
+ do {
+ i++;
+ } while (coinflip());
+ // Unlike `loop_suppress_after_zero_iterations`, running just one iteration
+ // in a do-while is not a corner case that would produce too many false
+ // positives, so don't suppress bounds errors in these situations.
+ return GlobalArray[i-2]; // expected-warning{{Out of bound access to memory}}
+}
+
+void do_while_report_in_second_iteration(int len) {
+ int buf[1] = {0};
+ int i = 0;
+ do {
+ buf[i] = 1; // expected-warning{{Out of bound access to memory}}
+ } while (i++ < len);
+}
+
+void do_while_suppress_in_third_iteration(int len) {
+ int buf[2] = {0};
+ int i = 0;
+ do {
+ buf[i] = 1; // no-warning
+ } while (i++ < len);
+}
>From 310bb3a6aa2894ab7303582ab0bd775fbf8405ed Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 5 Oct 2024 15:16:33 +0200
Subject: [PATCH 3/5] Add SuppressReportsHavingWeakLoopAssumption visitor
---
.../Checkers/ArrayBoundCheckerV2.cpp | 135 +++++++++++++++++-
1 file changed, 134 insertions(+), 1 deletion(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 3f837564cf47c4..3fcdbc6de6a068 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -11,10 +11,17 @@
//
//===----------------------------------------------------------------------===//
+#include "clang/AST/ASTContext.h"
#include "clang/AST/CharUnits.h"
+#include "clang/AST/Expr.h"
#include "clang/AST/ParentMapContext.h"
+#include "clang/AST/Stmt.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Analysis/ProgramPoint.h"
#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
#include "clang/StaticAnalyzer/Checkers/Taint.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h"
#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include "clang/StaticAnalyzer/Core/CheckerManager.h"
@@ -22,7 +29,7 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
@@ -32,6 +39,131 @@ using namespace ento;
using namespace taint;
using llvm::formatv;
+namespace {
+
+class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor {
+private:
+ friend class BugReporterVisitor;
+ llvm::DenseSet<const Stmt *> LoopsSeen;
+ llvm::DenseMap<const Expr *, unsigned> EagerlyAssumedTrue;
+ llvm::DenseMap<const Expr *, unsigned> EagerlyAssumedFalse;
+
+ void Profile(llvm::FoldingSetNodeID &ID) const final {
+ static const bool Tag = 0;
+ ID.AddPointer(&Tag);
+ }
+
+ static bool isLoopAndNonNull(const Stmt *S) {
+ return isa_and_nonnull<ForStmt, WhileStmt, CXXForRangeStmt, DoStmt>(S);
+ }
+
+ void registerLoopStatements(const ExplodedNode *N) {
+ if (auto Entrance = N->getLocation().getAs<BlockEntrance>()) {
+ const Stmt *TermStmt = Entrance->getBlock()->getTerminatorStmt();
+ if (isLoopAndNonNull(TermStmt))
+ LoopsSeen.insert(TermStmt);
+ }
+ }
+
+ void registerOccurrence(llvm::DenseMap<const Expr *, unsigned> &Map,
+ const Expr *Key) {
+ if (auto [Place, Inserted] = Map.try_emplace(Key, 1); !Inserted)
+ ++Place->second;
+ }
+
+ PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ,
+ BugReporterContext &BRC,
+ PathSensitiveBugReport &BR) final {
+ registerLoopStatements(Succ);
+
+ auto AsPostStmt = Succ->getLocationAs<PostStmt>();
+ const Expr *CurrExpr =
+ AsPostStmt ? dyn_cast<Expr>(AsPostStmt->getStmt()) : nullptr;
+
+ const auto *Tag = Succ->getLocation().getTag();
+ if (!Tag)
+ return nullptr;
+
+ StringRef TagDesc = Tag->getTagDescription();
+ if (TagDesc == "ExprEngine : Eagerly Assume True")
+ registerOccurrence(EagerlyAssumedTrue, CurrExpr);
+ if (TagDesc == "ExprEngine : Eagerly Assume False")
+ registerOccurrence(EagerlyAssumedFalse, CurrExpr);
+ return nullptr;
+ }
+
+ static const Expr *getCond(const Stmt *S) {
+ assert(isLoopAndNonNull(S));
+ switch (S->getStmtClass()) {
+ case Stmt::StmtClass::ForStmtClass:
+ return cast<ForStmt>(S)->getCond();
+ case Stmt::StmtClass::WhileStmtClass:
+ return cast<WhileStmt>(S)->getCond();
+ case Stmt::StmtClass::CXXForRangeStmtClass:
+ return cast<CXXForRangeStmt>(S)->getCond();
+ case Stmt::StmtClass::DoStmtClass:
+ return cast<DoStmt>(S)->getCond();
+ default:
+ return nullptr;
+ }
+ }
+
+ void trySuppressReport(PathSensitiveBugReport &R, const Stmt *Loop,
+ const Expr *CondSubExpr) const {
+ auto NumEagerlyTrue = EagerlyAssumedTrue.lookup(CondSubExpr);
+ auto NumEagerlyFalse = EagerlyAssumedFalse.lookup(CondSubExpr);
+
+ // Suppress the report if it avoided a loop with an eager assumption.
+ if (NumEagerlyTrue == 0 && NumEagerlyFalse == 1) {
+ R.markInvalid("eagerly decided never taking the loop body", CondSubExpr);
+ return;
+ }
+
+ // Account for do-while loops where the body is already "taken"
+ // unconditionally for the first round.
+ if (isa<DoStmt>(Loop)) {
+ // Suppress the report if we have taken the loop body for the second time
+ // with an eager assumption.
+ if (NumEagerlyTrue > 1 && NumEagerlyFalse == 0) {
+ R.markInvalid("eagerly taken the do-while body at least 2 times",
+ CondSubExpr);
+ }
+ return;
+ }
+
+ // Suppress the report if it iterates with eager assumptions 3 or more
+ // times.
+ if (NumEagerlyTrue > 2 && NumEagerlyFalse == 0) {
+ R.markInvalid("eagerly taken the loop body at least 2 times",
+ CondSubExpr);
+ }
+ }
+
+ void finalizeVisitor(BugReporterContext &BRC, const ExplodedNode *,
+ PathSensitiveBugReport &R) final {
+ ASTContext &Ctx = BRC.getASTContext();
+
+ // Go over all the loops we have seen (either avoided or entered), and check
+ // if the condition of such loops were eagerly decided.
+ for (const Stmt *Loop : LoopsSeen) {
+ if (const Expr *Cond = getCond(Loop)) {
+ // Let's try all sub-exprs to cover cases that use short-circuiting.
+ // We need to check if any sub-expression of the condition was eagerly
+ // decided, because the '&&' and '||' logical operators could
+ // short-circuit, thus the expression on which we recorded the
+ // "eager decision" was a sub-expression of the Loop condition.
+ using namespace ast_matchers;
+ for (auto Binding : match(findAll(expr().bind("e")), *Cond, Ctx)) {
+ trySuppressReport(R, Loop, Binding.getNodeAs<Expr>("e"));
+ if (!R.isValid())
+ return;
+ }
+ }
+ }
+ }
+};
+} // namespace
+
namespace {
/// If `E` is a "clean" array subscript expression, return the type of the
/// accessed element. If the base of the subscript expression is modified by
@@ -722,6 +854,7 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
if (Extent)
markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);
+ BR->addVisitor<SuppressReportsHavingWeakLoopAssumption>();
C.emitReport(std::move(BR));
}
>From 6e6ed6fabe7326f2429f84554c2a95e99127d944 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 5 Oct 2024 16:59:45 +0200
Subject: [PATCH 4/5] Implement without using eagerly-assume tags
---
.../Checkers/ArrayBoundCheckerV2.cpp | 82 +++++++++++++------
clang/test/Analysis/out-of-bounds.c | 4 +-
2 files changed, 58 insertions(+), 28 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index 3fcdbc6de6a068..aafd44d037273d 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -28,8 +28,11 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
@@ -42,17 +45,36 @@ using llvm::formatv;
namespace {
class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor {
+public:
+ SuppressReportsHavingWeakLoopAssumption(const PathSensitiveBugReport &R)
+ : OrigCursor(R.getErrorNode()) {}
+
private:
friend class BugReporterVisitor;
+
+ // The exploded node we are currently visiting, but in the original exploded
+ // graph. This graph is not trimmed, unlike the one we usually visit.
+ const ExplodedNode *OrigCursor;
+
llvm::DenseSet<const Stmt *> LoopsSeen;
- llvm::DenseMap<const Expr *, unsigned> EagerlyAssumedTrue;
- llvm::DenseMap<const Expr *, unsigned> EagerlyAssumedFalse;
+ llvm::DenseMap<const Expr *, unsigned> NumTimesTakenTrueDecision;
+ llvm::DenseMap<const Expr *, unsigned> NumTimesTakenFalseDecision;
void Profile(llvm::FoldingSetNodeID &ID) const final {
static const bool Tag = 0;
ID.AddPointer(&Tag);
}
+ static const ExplodedNode *selectMatchingPred(const ExplodedNode *OrigSucc,
+ unsigned SoughtPredId) {
+ auto MatchesSoughtId = [SoughtPredId](const ExplodedNode *N) {
+ return N->getID() == SoughtPredId;
+ };
+ auto It = llvm::find_if(OrigSucc->preds(), MatchesSoughtId);
+ assert(It != OrigSucc->pred_end());
+ return *It;
+ }
+
static bool isLoopAndNonNull(const Stmt *S) {
return isa_and_nonnull<ForStmt, WhileStmt, CXXForRangeStmt, DoStmt>(S);
}
@@ -74,21 +96,33 @@ class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor {
PathDiagnosticPieceRef VisitNode(const ExplodedNode *Succ,
BugReporterContext &BRC,
PathSensitiveBugReport &BR) final {
+ OrigCursor = selectMatchingPred(OrigCursor, Succ->getID());
+ assert(OrigCursor->getID() == Succ->getID());
+
registerLoopStatements(Succ);
auto AsPostStmt = Succ->getLocationAs<PostStmt>();
const Expr *CurrExpr =
AsPostStmt ? dyn_cast<Expr>(AsPostStmt->getStmt()) : nullptr;
- const auto *Tag = Succ->getLocation().getTag();
- if (!Tag)
+ bool IsSplittingTwoWays = OrigCursor->succ_size() == 2;
+
+ if (!CurrExpr || !IsSplittingTwoWays)
return nullptr;
- StringRef TagDesc = Tag->getTagDescription();
- if (TagDesc == "ExprEngine : Eagerly Assume True")
- registerOccurrence(EagerlyAssumedTrue, CurrExpr);
- if (TagDesc == "ExprEngine : Eagerly Assume False")
- registerOccurrence(EagerlyAssumedFalse, CurrExpr);
+ const ExplodedNode *Next = Succ->getFirstSucc();
+ SValBuilder &SVB = Succ->getState()->getStateManager().getSValBuilder();
+ auto Query = SVB.evalCast(Succ->getSVal(CurrExpr), SVB.getConditionType(),
+ CurrExpr->getType())
+ .getAs<DefinedOrUnknownSVal>();
+ if (!Query)
+ return nullptr;
+
+ ProgramStateRef TakenTrueBranch = Next->getState()->assume(*Query, true);
+ registerOccurrence(TakenTrueBranch ? NumTimesTakenTrueDecision
+ : NumTimesTakenFalseDecision,
+ CurrExpr);
+
return nullptr;
}
@@ -110,30 +144,26 @@ class SuppressReportsHavingWeakLoopAssumption : public BugReporterVisitor {
void trySuppressReport(PathSensitiveBugReport &R, const Stmt *Loop,
const Expr *CondSubExpr) const {
- auto NumEagerlyTrue = EagerlyAssumedTrue.lookup(CondSubExpr);
- auto NumEagerlyFalse = EagerlyAssumedFalse.lookup(CondSubExpr);
+ unsigned NumTimesTakenLoopBody =
+ NumTimesTakenTrueDecision.lookup(CondSubExpr);
+ unsigned NumTimesNotTakenLoopBody =
+ NumTimesTakenFalseDecision.lookup(CondSubExpr);
+
+ // Adjust for do-while loops, where the loop body is always taken.
+ if (isa<DoStmt>(Loop))
+ NumTimesTakenLoopBody += 1;
// Suppress the report if it avoided a loop with an eager assumption.
- if (NumEagerlyTrue == 0 && NumEagerlyFalse == 1) {
+ if (NumTimesTakenLoopBody == 0 && NumTimesNotTakenLoopBody == 1) {
+ // llvm::errs() << "eagerly decided never taking the loop body\n";
R.markInvalid("eagerly decided never taking the loop body", CondSubExpr);
return;
}
- // Account for do-while loops where the body is already "taken"
- // unconditionally for the first round.
- if (isa<DoStmt>(Loop)) {
- // Suppress the report if we have taken the loop body for the second time
- // with an eager assumption.
- if (NumEagerlyTrue > 1 && NumEagerlyFalse == 0) {
- R.markInvalid("eagerly taken the do-while body at least 2 times",
- CondSubExpr);
- }
- return;
- }
-
// Suppress the report if it iterates with eager assumptions 3 or more
// times.
- if (NumEagerlyTrue > 2 && NumEagerlyFalse == 0) {
+ if (NumTimesTakenLoopBody > 2 && NumTimesNotTakenLoopBody == 0) {
+ // llvm::errs() << "eagerly taken the loop body at least 2 times\n";
R.markInvalid("eagerly taken the loop body at least 2 times",
CondSubExpr);
}
@@ -854,7 +884,7 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
if (Extent)
markPartsInteresting(*BR, ErrorState, *Extent, IsTaintBug);
- BR->addVisitor<SuppressReportsHavingWeakLoopAssumption>();
+ BR->addVisitor<SuppressReportsHavingWeakLoopAssumption>(*BR);
C.emitReport(std::move(BR));
}
diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c
index fde53d148ba442..1b62b2eed4ee84 100644
--- a/clang/test/Analysis/out-of-bounds.c
+++ b/clang/test/Analysis/out-of-bounds.c
@@ -279,7 +279,7 @@ void loop_suppress_in_third_iteration_logical_and(int len, int flag) {
// short-circuiting operator '&&'.
// I have seen a real-world FP that looks like this, but it is much rarer
// than the basic setup.
- buf[i] = 1; // expected-warning{{Out of bound access to memory}}
+ buf[i] = 1; // no-warning
}
}
@@ -311,7 +311,7 @@ void loop_suppress_in_third_iteration_cast(int len) {
// assumption. There are already many differences between analysis with and
// without eager assumptions, so it would be pointless to write more
// complicated code to eliminate these rare differences.
- buf[i] = 1; // eagerlyassume-warning{{Out of bound access to memory}}
+ buf[i] = 1; // no-warning
}
}
>From 26f02f3f5824d0c78132e46ace2b3d41f8dd68e2 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Sat, 5 Oct 2024 16:59:55 +0200
Subject: [PATCH 5/5] Revert "Use the eager assumption tags only if the
decision was ambiguous"
This reverts commit c429df63af6cdeec7e721e8f9c322f1ac6ba3b3d.
---
clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 15 ++++--------
clang/test/Analysis/assuming-unsigned-ge-0.c | 5 +++-
clang/test/Analysis/cast-value-notes.cpp | 20 +++++++++-------
clang/test/Analysis/cast-value-state-dump.cpp | 5 ++--
.../std-c-library-functions-arg-constraints.c | 24 ++++++++++++-------
5 files changed, 40 insertions(+), 29 deletions(-)
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 62066632db87d0..b1919d7027cf4d 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -3771,29 +3771,24 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
SVal V = state->getSVal(Ex, Pred->getLocationContext());
std::optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>();
if (SEV && SEV->isExpression()) {
+ const std::pair<const ProgramPointTag *, const ProgramPointTag*> &tags =
+ geteagerlyAssumeBinOpBifurcationTags();
+
ProgramStateRef StateTrue, StateFalse;
std::tie(StateTrue, StateFalse) = state->assume(*SEV);
- const ProgramPointTag *EagerTrueTag = nullptr;
- const ProgramPointTag *EagerFalseTag = nullptr;
-
- // Use the eager assumption tags if the choice was ambiguous.
- if (StateTrue && StateFalse)
- std::tie(EagerTrueTag, EagerFalseTag) =
- geteagerlyAssumeBinOpBifurcationTags();
-
// First assume that the condition is true.
if (StateTrue) {
SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val);
- Bldr.generateNode(Ex, Pred, StateTrue, EagerTrueTag);
+ Bldr.generateNode(Ex, Pred, StateTrue, tags.first);
}
// Next, assume that the condition is false.
if (StateFalse) {
SVal Val = svalBuilder.makeIntVal(0U, Ex->getType());
StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val);
- Bldr.generateNode(Ex, Pred, StateFalse, EagerFalseTag);
+ Bldr.generateNode(Ex, Pred, StateFalse, tags.second);
}
}
}
diff --git a/clang/test/Analysis/assuming-unsigned-ge-0.c b/clang/test/Analysis/assuming-unsigned-ge-0.c
index 18e637a6e5310e..553e68cb96c6bd 100644
--- a/clang/test/Analysis/assuming-unsigned-ge-0.c
+++ b/clang/test/Analysis/assuming-unsigned-ge-0.c
@@ -2,7 +2,10 @@
// RUN: -analyzer-checker=core -verify %s
int assuming_unsigned_ge_0(unsigned arg) {
- // expected-note at +2 {{'arg' is >= 0}}
+ // TODO This testcase demonstrates the current incorrect behavior of Clang
+ // Static Analyzer: here 'arg' is unsigned, so "arg >= 0" is not a fresh
+ // assumption, but it still appears in the diagnostics as if it's fresh:
+ // expected-note at +2 {{Assuming 'arg' is >= 0}}
// expected-note at +1 {{Taking false branch}}
if (arg < 0)
return 0;
diff --git a/clang/test/Analysis/cast-value-notes.cpp b/clang/test/Analysis/cast-value-notes.cpp
index 0235404e73adf6..7ee224dc6e5d8f 100644
--- a/clang/test/Analysis/cast-value-notes.cpp
+++ b/clang/test/Analysis/cast-value-notes.cpp
@@ -168,8 +168,9 @@ void evalNonNullParamNonNullReturnReference(const Shape &S) {
// expected-note at -2 {{Taking true branch}}
(void)(1 / !C);
- // expected-note at -1 {{Division by zero}}
- // expected-warning at -2 {{Division by zero}}
+ // expected-note at -1 {{'C' is non-null}}
+ // expected-note at -2 {{Division by zero}}
+ // expected-warning at -3 {{Division by zero}}
}
}
@@ -225,8 +226,9 @@ void evalNonNullParamNonNullReturn(const Shape *S) {
// expected-note at -2 {{Taking true branch}}
(void)(1 / !C);
- // expected-note at -1 {{Division by zero}}
- // expected-warning at -2 {{Division by zero}}
+ // expected-note at -1 {{'C' is non-null}}
+ // expected-note at -2 {{Division by zero}}
+ // expected-warning at -3 {{Division by zero}}
}
}
@@ -241,8 +243,9 @@ void evalNonNullParamNullReturn(const Shape *S) {
// expected-note at -4 {{Taking true branch}}
(void)(1 / !T);
- // expected-note at -1 {{Division by zero}}
- // expected-warning at -2 {{Division by zero}}
+ // expected-note at -1 {{'T' is non-null}}
+ // expected-note at -2 {{Division by zero}}
+ // expected-warning at -3 {{Division by zero}}
}
}
@@ -262,8 +265,9 @@ void evalZeroParamNonNullReturnPointer(const Shape *S) {
// expected-note at -2 {{'C' initialized here}}
(void)(1 / !C);
- // expected-note at -1 {{Division by zero}}
- // expected-warning at -2 {{Division by zero}}
+ // expected-note at -1 {{'C' is non-null}}
+ // expected-note at -2 {{Division by zero}}
+ // expected-warning at -3 {{Division by zero}}
}
void evalZeroParamNonNullReturn(const Shape &S) {
diff --git a/clang/test/Analysis/cast-value-state-dump.cpp b/clang/test/Analysis/cast-value-state-dump.cpp
index 97f3b2236db087..07fd7abd848ab7 100644
--- a/clang/test/Analysis/cast-value-state-dump.cpp
+++ b/clang/test/Analysis/cast-value-state-dump.cpp
@@ -40,7 +40,8 @@ void evalNonNullParamNonNullReturn(const Shape *S) {
// CHECK-NEXT: ] }
(void)(1 / !C);
- // expected-note at -1 {{Division by zero}}
- // expected-warning at -2 {{Division by zero}}
+ // expected-note at -1 {{'C' is non-null}}
+ // expected-note at -2 {{Division by zero}}
+ // expected-warning at -3 {{Division by zero}}
}
diff --git a/clang/test/Analysis/std-c-library-functions-arg-constraints.c b/clang/test/Analysis/std-c-library-functions-arg-constraints.c
index a6e0ad15129f60..0b817dda98c727 100644
--- a/clang/test/Analysis/std-c-library-functions-arg-constraints.c
+++ b/clang/test/Analysis/std-c-library-functions-arg-constraints.c
@@ -42,7 +42,8 @@ void test_alnum_symbolic(int x) {
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
// bugpath-note{{TRUE}} \
- // bugpath-note{{Left side of '&&' is true}}
+ // bugpath-note{{Left side of '&&' is true}} \
+ // bugpath-note{{'x' is <= 255}}
}
void test_alnum_symbolic2(int x) {
@@ -75,7 +76,8 @@ void test_toupper_symbolic(int x) {
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
// bugpath-note{{TRUE}} \
- // bugpath-note{{Left side of '&&' is true}}
+ // bugpath-note{{Left side of '&&' is true}} \
+ // bugpath-note{{'x' is <= 255}}
}
void test_toupper_symbolic2(int x) {
@@ -108,7 +110,8 @@ void test_tolower_symbolic(int x) {
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
// bugpath-note{{TRUE}} \
- // bugpath-note{{Left side of '&&' is true}}
+ // bugpath-note{{Left side of '&&' is true}} \
+ // bugpath-note{{'x' is <= 255}}
}
void test_tolower_symbolic2(int x) {
@@ -141,7 +144,8 @@ void test_toascii_symbolic(int x) {
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
// bugpath-note{{TRUE}} \
- // bugpath-note{{Left side of '&&' is true}}
+ // bugpath-note{{Left side of '&&' is true}} \
+ // bugpath-note{{'x' is <= 255}}
}
void test_toascii_symbolic2(int x) {
@@ -169,7 +173,8 @@ void test_notnull_symbolic(FILE *fp, int *buf) {
clang_analyzer_eval(buf != 0); // \
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
- // bugpath-note{{TRUE}}
+ // bugpath-note{{TRUE}} \
+ // bugpath-note{{'buf' is not equal to null}}
}
void test_notnull_symbolic2(FILE *fp, int *buf) {
if (!buf) // bugpath-note{{Assuming 'buf' is null}} \
@@ -213,7 +218,8 @@ void test_notnull_buffer_3(void *buf) {
clang_analyzer_eval(buf != 0); // \
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
- // bugpath-note{{TRUE}}
+ // bugpath-note{{TRUE}} \
+ // bugpath-note{{'buf' is not equal to null}}
}
void test_no_node_after_bug(FILE *fp, size_t size, size_t n, void *buf) {
@@ -293,7 +299,8 @@ void test_buf_size_symbolic(int s) {
clang_analyzer_eval(s <= 3); // \
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
- // bugpath-note{{TRUE}}
+ // bugpath-note{{TRUE}} \
+ // bugpath-note{{'s' is <= 3}}
}
void test_buf_size_symbolic_and_offset(int s) {
char buf[3];
@@ -301,7 +308,8 @@ void test_buf_size_symbolic_and_offset(int s) {
clang_analyzer_eval(s <= 2); // \
// report-warning{{TRUE}} \
// bugpath-warning{{TRUE}} \
- // bugpath-note{{TRUE}}
+ // bugpath-note{{TRUE}} \
+ // bugpath-note{{'s' is <= 2}}
}
int __buf_size_arg_constraint_mul(const void *, size_t, size_t);
More information about the cfe-commits
mailing list