[clang] edbc1fb - [analyzer] Add option assume-at-least-one-iteration (#125494)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 12 02:56:09 PST 2025
Author: DonĂ¡t Nagy
Date: 2025-02-12T11:56:02+01:00
New Revision: edbc1fb228acd7746d91cd72d476c063704504b8
URL: https://github.com/llvm/llvm-project/commit/edbc1fb228acd7746d91cd72d476c063704504b8
DIFF: https://github.com/llvm/llvm-project/commit/edbc1fb228acd7746d91cd72d476c063704504b8.diff
LOG: [analyzer] Add option assume-at-least-one-iteration (#125494)
This commit adds the new analyzer option
`assume-at-least-one-iteration`, which is `false` by default, but can be
set to `true` to ensure that the analyzer always assumes at least one
iteration in loops.
In some situations this "loop is skipped" execution path is an important
corner case that may evade the notice of the developer and hide
significant bugs -- however, there are also many situations where it's
guaranteed that at least one iteration will happen (e.g. some data
structure is always nonempty), but the analyzer cannot realize this and
will produce false positives when it assumes that the loop is skipped.
This commit refactors some logic around the implementation of the new
feature, but the only functional change is introducing the new analyzer
option. If the new option is left in its default state (false), then the
analysis is functionally equivalent to an analysis done with a version
before this commit.
Added:
Modified:
clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
clang/test/Analysis/analyzer-config.c
clang/test/Analysis/loop-assumptions.c
Removed:
################################################################################
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 34bb7a809162b..a9b8d0753673b 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -294,6 +294,16 @@ ANALYZER_OPTION(
bool, ShouldUnrollLoops, "unroll-loops",
"Whether the analysis should try to unroll loops with known bounds.", false)
+ANALYZER_OPTION(
+ bool, ShouldAssumeAtLeastOneIteration, "assume-at-least-one-iteration",
+ "Whether the analyzer should always assume at least one iteration in "
+ "loops where the loop condition is opaque (i.e. the analyzer cannot "
+ "determine if it's true or false). Setting this to true eliminates some "
+ "false positives (where e.g. a structure is nonempty, but the analyzer "
+ "does not notice this); but it also eliminates some true positives (e.g. "
+ "cases where a structure can be empty and this causes buggy behavior).",
+ false)
+
ANALYZER_OPTION(
bool, ShouldDisplayNotesAsEvents, "notes-as-events",
"Whether the bug reporter should transparently treat extra note diagnostic "
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index e3ec7c57571c8..c799687ccb44f 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -2813,13 +2813,24 @@ void ExprEngine::processBranch(
if (StTrue && StFalse)
assert(!isa<ObjCForCollectionStmt>(Condition));
+ // We want to ensure consistent behavior between `eagerly-assume=false`,
+ // when the state split is always performed by the `assumeCondition()`
+ // call within this function and `eagerly-assume=true` (the default), when
+ // some conditions (comparison operators, unary negation) can trigger a
+ // state split before this callback. There are some contrived corner cases
+ // that behave
diff erently with and without `eagerly-assume`, but I don't
+ // know about an example that could plausibly appear in "real" code.
+ bool BothFeasible =
+ (StTrue && StFalse) ||
+ didEagerlyAssumeBifurcateAt(PrevState, dyn_cast<Expr>(Condition));
+
if (StTrue) {
- // If we are processing a loop condition where two iterations have
- // already been completed and the false branch is also feasible, then
- // don't assume a third iteration because it is a redundant execution
- // path (unlikely to be
diff erent from earlier loop exits) and can cause
- // false positives if e.g. the loop iterates over a two-element structure
- // with an opaque condition.
+ // In a loop, if both branches are feasible (i.e. the analyzer doesn't
+ // understand the loop condition) and two iterations have already been
+ // completed, then don't assume a third iteration because it is a
+ // redundant execution path (unlikely to be
diff erent from earlier loop
+ // exits) and can cause false positives if e.g. the loop iterates over a
+ // two-element structure with an opaque condition.
//
// The iteration count "2" is hardcoded because it's the natural limit:
// * the fact that the programmer wrote a loop (and not just an `if`)
@@ -2830,10 +2841,7 @@ void ExprEngine::processBranch(
// two iterations". (This pattern is common in FFMPEG and appears in
// many other projects as well.)
bool CompletedTwoIterations = IterationsCompletedInLoop.value_or(0) >= 2;
- bool FalseAlsoFeasible =
- StFalse ||
- didEagerlyAssumeBifurcateAt(PrevState, dyn_cast<Expr>(Condition));
- bool SkipTrueBranch = CompletedTwoIterations && FalseAlsoFeasible;
+ bool SkipTrueBranch = BothFeasible && CompletedTwoIterations;
// FIXME: This "don't assume third iteration" heuristic partially
// conflicts with the widen-loop analysis option (which is off by
@@ -2843,8 +2851,25 @@ void ExprEngine::processBranch(
Builder.generateNode(StTrue, true, PredN);
}
- if (StFalse)
- Builder.generateNode(StFalse, false, PredN);
+ if (StFalse) {
+ // In a loop, if both branches are feasible (i.e. the analyzer doesn't
+ // understand the loop condition), we are before the first iteration and
+ // the analyzer option `assume-at-least-one-iteration` is set to `true`,
+ // then avoid creating the execution path where the loop is skipped.
+ //
+ // In some situations this "loop is skipped" execution path is an
+ // important corner case that may evade the notice of the developer and
+ // hide significant bugs -- however, there are also many situations where
+ // it's guaranteed that at least one iteration will happen (e.g. some
+ // data structure is always nonempty), but the analyzer cannot realize
+ // this and will produce false positives when it assumes that the loop is
+ // skipped.
+ bool BeforeFirstIteration = IterationsCompletedInLoop == std::optional{0};
+ bool SkipFalseBranch = BothFeasible && BeforeFirstIteration &&
+ AMgr.options.ShouldAssumeAtLeastOneIteration;
+ if (!SkipFalseBranch)
+ Builder.generateNode(StFalse, false, PredN);
+ }
}
currBldrCtx = nullptr;
}
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index d5eb790b82f23..f6a49680917ac 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -10,6 +10,7 @@
// CHECK-NEXT: alpha.cplusplus.STLAlgorithmModeling:AggressiveStdFindModeling = false
// CHECK-NEXT: alpha.osx.cocoa.DirectIvarAssignment:AnnotatedFunctions = false
// CHECK-NEXT: apply-fixits = false
+// CHECK-NEXT: assume-at-least-one-iteration = false
// CHECK-NEXT: assume-controlled-environment = false
// CHECK-NEXT: avoid-suppressing-null-argument-paths = false
// CHECK-NEXT: c++-allocator-inlining = true
diff --git a/clang/test/Analysis/loop-assumptions.c b/clang/test/Analysis/loop-assumptions.c
index eb0ffdce722e0..b61ed8815e3f6 100644
--- a/clang/test/Analysis/loop-assumptions.c
+++ b/clang/test/Analysis/loop-assumptions.c
@@ -1,25 +1,48 @@
// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \
-// RUN: -verify=expected,eagerlyassume %s
+// RUN: -verify=expected,noassumeone,eagerlyassume,combo %s
// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \
// RUN: -analyzer-config eagerly-assume=false \
+// RUN: -verify=expected,noassumeone,noeagerlyassume,combo %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \
+// RUN: -analyzer-config assume-at-least-one-iteration=true \
+// RUN: -verify=expected,eagerlyassume,combo %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection \
+// RUN: -analyzer-config assume-at-least-one-iteration=true,eagerly-assume=false \
// RUN: -verify=expected,noeagerlyassume %s
+// The verify tag "combo" is used for one unique warning which is produced in three
+// of the four RUN combinations.
+
// These tests validate the logic within `ExprEngine::processBranch` which
// ensures that in loops with opaque conditions we don't assume execution paths
// if the code does not imply that they are possible.
+// In particular, if two (or more) iterations are already completed in a loop,
+// we don't assume that there can be another iteration. Moreover, if the
+// analyzer option `assume-at-least-one-iteration` is enabled, then we don't
+// assume that a loop can be skipped completely.
void clang_analyzer_numTimesReached(void);
-void clang_analyzer_warnIfReached(void);
void clang_analyzer_dump(int);
-void clearCondition(void) {
- // If the analyzer can definitely determine the value of the loop condition,
+void clearTrueCondition(void) {
+ // If the analyzer can definitely determine that the loop condition is true,
// then this corrective logic doesn't activate and the engine executes
// `-analyzer-max-loop` iterations (by default, 4).
- for (int i = 0; i < 10; i++)
+ int i;
+ for (i = 0; i < 10; i++)
clang_analyzer_numTimesReached(); // expected-warning {{4}}
- clang_analyzer_warnIfReached(); // unreachable
+ clang_analyzer_dump(i); // Unreachable, no reports.
+}
+
+void clearFalseCondition(void) {
+ // If the analyzer can definitely determine that the loop condition is false,
+ // then the loop is skipped, even in `assume-at-least-one-iteration` mode.
+ int i;
+ for (i = 0; i > 10; i++)
+ clang_analyzer_numTimesReached(); // Unreachable, no report.
+
+ clang_analyzer_dump(i); // expected-warning {{0}}
}
void opaqueCondition(int arg) {
@@ -28,10 +51,13 @@ void opaqueCondition(int arg) {
// that more than two iterations are possible. (It _does_ imply that two
// iterations may be possible at least in some cases, because otherwise an
// `if` would've been enough.)
- for (int i = 0; i < arg; i++)
+ // Moreover, if `assume-at-least-one-iteration` is enabled, then assume at
+ // least one iteration.
+ int i;
+ for (i = 0; i < arg; i++)
clang_analyzer_numTimesReached(); // expected-warning {{2}}
- clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+ clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{2}}
}
int check(void);
@@ -42,22 +68,26 @@ void opaqueConditionCall(int arg) {
// insert an assertion to guide the analyzer and rule out more than two
// iterations (so the analyzer needs to proactively avoid those unjustified
// branches).
- while (check())
+ int i = 0; // Helper to distinguish the the branches after the loop.
+ while (check()) {
clang_analyzer_numTimesReached(); // expected-warning {{2}}
+ i++;
+ }
- clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+ clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{2}}
}
void opaqueConditionDoWhile(int arg) {
// Same situation as `opaqueCondition()` but with a `do {} while ()` loop.
// This is tested separately because this loop type is a special case in the
// iteration count calculation.
+ // Obviously, this loop guarantees that at least one iteration will happen.
int i = 0;
do {
clang_analyzer_numTimesReached(); // expected-warning {{2}}
} while (i++ < arg);
- clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+ clang_analyzer_dump(i); // expected-warning {{1}} expected-warning {{2}}
}
void dontRememberOldBifurcation(int arg) {
@@ -69,7 +99,7 @@ void dontRememberOldBifurcation(int arg) {
// by default), because the code remembered that there was a bifurcation on
// the first iteration of the loop and didn't realize that this is obsolete.
- // NOTE: The variable `i` is introduced to ensure that the iterations of the
+ // NOTE: The variable `i` is significant to ensure that the iterations of the
// loop change the state -- otherwise the analyzer stops iterating because it
// returns to the same `ExplodedNode`.
int i = 0;
@@ -78,10 +108,12 @@ void dontRememberOldBifurcation(int arg) {
i++;
}
- clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+ clang_analyzer_dump(i); // noassumeone-warning {{0}}
}
void dontAssumeFourthIterartion(int arg) {
+ int i;
+
if (arg == 2)
return;
@@ -89,10 +121,10 @@ void dontAssumeFourthIterartion(int arg) {
// iterations (because it knows that `arg != 2` at that point), so it
// performs a third iteration, but it does not assume that a fourth iteration
// is also possible.
- for (int i = 0; i < arg; i++)
+ for (i = 0; i < arg; i++)
clang_analyzer_numTimesReached(); // expected-warning {{3}}
- clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+ clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{3}}
}
#define TRUE 1
@@ -108,42 +140,53 @@ void shortCircuitInLoopCondition(int arg) {
// false positive on the ffmpeg codebase. Eventually we should properly
// recognize the full syntactical loop condition expression as "the loop
// condition", but this will be complicated to implement.
- for (int i = 0; i < arg && TRUE; i++) {
+ int i;
+ for (i = 0; i < arg && TRUE; i++) {
clang_analyzer_numTimesReached(); // expected-warning {{4}}
}
- clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+ clang_analyzer_dump(i); // expected-warning {{0}} expected-warning {{1}} expected-warning {{2}} expected-warning {{3}}
}
void shortCircuitInLoopConditionRHS(int arg) {
// Unlike `shortCircuitInLoopCondition()`, this case is handled properly
// because the analyzer thinks that the right hand side of the `&&` is the
// loop condition.
- for (int i = 0; TRUE && i < arg; i++) {
+ int i;
+ for (i = 0; TRUE && i < arg; i++) {
clang_analyzer_numTimesReached(); // expected-warning {{2}}
}
- clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+ clang_analyzer_dump(i); // noassumeone-warning {{0}} expected-warning {{1}} expected-warning {{2}}
}
void eagerlyAssumeInSubexpression(int arg) {
// The `EagerlyAssume` logic is another complication that can "split the
// state" within the loop condition, but before the `processBranch()` call
- // which is (in theory) responsible for evaluating the loop condition.
- // The current implementation partially compensates this by noticing the
+ // which would be "naturally" responsible for evaluating the loop condition.
+ // The current implementation tries to handle this by noticing the
// cases where the loop condition is targeted by `EagerlyAssume`, but does
// not handle the (fortunately rare) case when `EagerlyAssume` hits a
// sub-expression of the loop condition (as in this contrived test case).
- // FIXME: I don't know a real-world example for this inconsistency, but it
- // would be good to eliminate it eventually.
- for (int i = 0; (i >= arg) - 1; i++) {
+ // FIXME: It would be good to eventually eliminate this inconsistency, but
+ // I don't know a realistic example that could appear in real-world code, so
+ // this seems to be a low-priority goal.
+ int i;
+ for (i = 0; (i >= arg) - 1; i++) {
clang_analyzer_numTimesReached(); // eagerlyassume-warning {{4}} noeagerlyassume-warning {{2}}
}
- clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+
+ // The 'combo' note intentionally appears if `assume-at-least-one-iteration`
+ // is disabled, but also appears as a bug when `eagerly-assume` and
+ // `assume-at-least-one-iteration` are both enabled.
+ clang_analyzer_dump(i); // combo-warning {{0}} expected-warning {{1}} expected-warning {{2}} eagerlyassume-warning {{3}}
}
void calledTwice(int arg, int isFirstCall) {
// This function is called twice (with two
diff erent unknown 'arg' values) to
// check the iteration count handling in this situation.
- for (int i = 0; i < arg; i++) {
+ int i;
+ for (i = 0; i < arg; i++) {
if (isFirstCall) {
clang_analyzer_numTimesReached(); // expected-warning {{2}}
} else {
@@ -215,5 +258,5 @@ void onlyLoopConditions(int arg) {
break;
}
- clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}}
+ clang_analyzer_dump(i); // expected-warning {{1}} expected-warning {{2}} expected-warning {{3}} expected-warning {{4}}
}
More information about the cfe-commits
mailing list