[clang] ea8e328 - [analyzer][Z3] Restore the original timeout of 15s (#118291)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 13 05:31:12 PST 2024
Author: Kristóf Umann
Date: 2024-12-13T14:31:06+01:00
New Revision: ea8e328ae2bea9d9a7d556ef4d791fa116f7de18
URL: https://github.com/llvm/llvm-project/commit/ea8e328ae2bea9d9a7d556ef4d791fa116f7de18
DIFF: https://github.com/llvm/llvm-project/commit/ea8e328ae2bea9d9a7d556ef4d791fa116f7de18.diff
LOG: [analyzer][Z3] Restore the original timeout of 15s (#118291)
Discussion here:
https://discourse.llvm.org/t/analyzer-rfc-taming-z3-query-times/79520/15?u=szelethus
The original patch, #97298 introduced new timeouts backed by thorough
testing and measurements to keep the running time of Z3 within
reasonable limits. The measurements also showed that only certain
reports and certain TUs were responsible for the poor performance of Z3
refutation.
Unfortunately, it seems like that on machines with different
characteristics (slower machines) the current timeouts don't just axe
0.01% of reports, but many more as well. Considering that timeouts are
inherently nondeterministic as a cutoff point, this lead reports sets
being vastly different on the same projects with the same configuration.
The discussion link shows that all configurations introduced in the
patch with their default values lead to severa nondeterminism of the
analyzer. As we, and others use the analyzer as a gating tool for PRs,
we should revert to the original defaults.
We should respect that
* There are still parts of the analyzer that are either proven or
suspected to contain nondeterministic code (like pointer sets),
* A 15s timeout is more likely to hit the same reports every time on a
wider range of machines, but is still inherently nondeterministic, but
an infinite timeout leads to the tool hanging,
* If you measure the performance of the analyzer on your machines, you
can and should achieve some speedup with little or no observable
nondeterminism.
---------
Co-authored-by: Balazs Benics <benicsbalazs at gmail.com>
Added:
Modified:
clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
clang/test/Analysis/analyzer-config.c
clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index ad2dbffe88326d..d8a7c755c95968 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -189,20 +189,29 @@ ANALYZER_OPTION(
"crosscheck-with-z3-eqclass-timeout-threshold",
"Set a timeout for bug report equivalence classes in milliseconds. "
"If we exhaust this threshold, we will drop the bug report eqclass "
- "instead of doing more Z3 queries. Set 0 for no timeout.", 700)
+ "instead of doing more Z3 queries. Setting this to 700 ms in conjunction "
+ "with \"crosscheck-with-z3-timeout-threshold\" of 300 ms, would nicely "
+ "guarantee that no bug report equivalence class can take longer than "
+ "1 second, effectively mitigating Z3 hangs during refutation. "
+ "Set 0 for no timeout.", 0)
ANALYZER_OPTION(
unsigned, Z3CrosscheckTimeoutThreshold,
"crosscheck-with-z3-timeout-threshold",
"Set a timeout for individual Z3 queries in milliseconds. "
- "Set 0 for no timeout.", 300)
+ "On fast machines, 300 worked well in some cases. "
+ "The lower it is, the higher the chances of having flaky issues. "
+ "Having no timeout may hang the analyzer indefinitely. "
+ "Set 0 for no timeout.", 15'000)
ANALYZER_OPTION(
unsigned, Z3CrosscheckRLimitThreshold,
"crosscheck-with-z3-rlimit-threshold",
- "Set the Z3 resource limit threshold. This sets a deterministic cutoff "
- "point for Z3 queries, as longer queries usually consume more resources. "
- "Set 0 for unlimited.", 400'000)
+ "Set the Z3 resource limit threshold. This sets a supposedly deterministic "
+ "cutoff point for Z3 queries, as longer queries usually consume more "
+ "resources. "
+ "400'000 should on average make Z3 queries run for up to 100ms on modern "
+ "hardware. Set 0 for unlimited.", 0)
ANALYZER_OPTION(bool, ShouldReportIssuesInMainSourceFile,
"report-in-main-source-file",
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index 47594e8317bc79..8ce6184144d4b8 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -40,9 +40,9 @@
// CHECK-NEXT: cplusplus.Move:WarnOn = KnownsAndLocals
// CHECK-NEXT: cplusplus.SmartPtrModeling:ModelSmartPtrDereference = false
// CHECK-NEXT: crosscheck-with-z3 = false
-// CHECK-NEXT: crosscheck-with-z3-eqclass-timeout-threshold = 700
-// CHECK-NEXT: crosscheck-with-z3-rlimit-threshold = 400000
-// CHECK-NEXT: crosscheck-with-z3-timeout-threshold = 300
+// CHECK-NEXT: crosscheck-with-z3-eqclass-timeout-threshold = 0
+// CHECK-NEXT: crosscheck-with-z3-rlimit-threshold = 0
+// CHECK-NEXT: crosscheck-with-z3-timeout-threshold = 15000
// CHECK-NEXT: ctu-dir = ""
// CHECK-NEXT: ctu-import-cpp-threshold = 8
// CHECK-NEXT: ctu-import-threshold = 24
diff --git a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
index ef07e47ee911b2..626f5c163d17d0 100644
--- a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
+++ b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
@@ -38,8 +38,8 @@ static const AnalyzerOptions DefaultOpts = [] {
// Remember to update the tests in this file when these values change.
// Also update the doc comment of `interpretQueryResult`.
- assert(Config.Z3CrosscheckRLimitThreshold == 400'000);
- assert(Config.Z3CrosscheckTimeoutThreshold == 300_ms);
+ assert(Config.Z3CrosscheckRLimitThreshold == 0);
+ assert(Config.Z3CrosscheckTimeoutThreshold == 15'000_ms);
// Usually, when the timeout/rlimit threshold is reached, Z3 only slightly
// overshoots until it realizes that it overshoot and needs to back off.
// Consequently, the measured timeout should be fairly close to the threshold.
@@ -47,8 +47,17 @@ static const AnalyzerOptions DefaultOpts = [] {
return Config;
}();
+static const AnalyzerOptions LimitedOpts = [] {
+ AnalyzerOptions Config = DefaultOpts;
+ Config.Z3CrosscheckEQClassTimeoutThreshold = 700_ms;
+ Config.Z3CrosscheckTimeoutThreshold = 300_step;
+ Config.Z3CrosscheckRLimitThreshold = 400'000_step;
+ return Config;
+}();
+
namespace {
+template <const AnalyzerOptions &Opts>
class Z3CrosscheckOracleTest : public testing::Test {
public:
Z3Decision interpretQueryResult(const Z3Result &Result) {
@@ -56,58 +65,100 @@ class Z3CrosscheckOracleTest : public testing::Test {
}
private:
- Z3CrosscheckOracle Oracle = Z3CrosscheckOracle(DefaultOpts);
+ Z3CrosscheckOracle Oracle = Z3CrosscheckOracle(Opts);
};
-TEST_F(Z3CrosscheckOracleTest, AcceptsFirstSAT) {
+using DefaultZ3CrosscheckOracleTest = Z3CrosscheckOracleTest<DefaultOpts>;
+using LimitedZ3CrosscheckOracleTest = Z3CrosscheckOracleTest<LimitedOpts>;
+
+TEST_F(DefaultZ3CrosscheckOracleTest, AcceptsFirstSAT) {
+ ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, AcceptsFirstSAT) {
ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, AcceptsSAT) {
+TEST_F(DefaultZ3CrosscheckOracleTest, AcceptsSAT) {
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+ ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, AcceptsSAT) {
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, SATWhenItGoesOverTime) {
+TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItGoesOverTime) {
+ // Even if it times out, if it is SAT, we should accept it.
+ ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 15'010_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItGoesOverTime) {
// Even if it times out, if it is SAT, we should accept it.
ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 310_ms, 1000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
+TEST_F(DefaultZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
+ ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 15'010_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 310_ms, 1000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, RejectsTimeout) {
+TEST_F(DefaultZ3CrosscheckOracleTest, RejectsTimeout) {
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+ ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 15'010_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectsTimeout) {
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 310_ms, 1000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, RejectsUNSATs) {
+TEST_F(DefaultZ3CrosscheckOracleTest, RejectsUNSATs) {
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectsUNSATs) {
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
}
-// Testing cut heuristics:
-// =======================
+// Testing cut heuristics of the two configurations:
+// =================================================
-TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
+TEST_F(DefaultZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
+ // Simulate long queries, that barely doesn't trigger the timeout.
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
// Simulate long queries, that barely doesn't trigger the timeout.
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
+TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
+ // Simulate long queries, that barely doesn't trigger the timeout.
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
+ ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
+ ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 14'990_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
// Simulate long queries, that barely doesn't trigger the timeout.
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 290_ms, 1000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
+// Z3CrosscheckEQClassTimeoutThreshold is disabled in default configuration, so
+// it doesn't make sense to test that.
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
// Simulate quick, but many queries: 35 quick UNSAT queries.
// 35*20ms = 700ms, which is equal to the 700ms threshold.
for (int i = 0; i < 35; ++i) {
@@ -117,7 +168,9 @@ TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 1_ms, 1000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
+// Z3CrosscheckEQClassTimeoutThreshold is disabled in default configuration, so
+// it doesn't make sense to test that.
+TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItAttemptsManySmallQueries) {
// Simulate quick, but many queries: 35 quick UNSAT queries.
// 35*20ms = 700ms, which is equal to the 700ms threshold.
for (int i = 0; i < 35; ++i) {
@@ -128,16 +181,34 @@ TEST_F(Z3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 200_ms, 1000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) {
+// Z3CrosscheckRLimitThreshold is disabled in default configuration, so it
+// doesn't make sense to test that.
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) {
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 25_ms, 405'000_step}));
}
-TEST_F(Z3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {
+// Z3CrosscheckRLimitThreshold is disabled in default configuration, so it
+// doesn't make sense to test that.
+TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 405'000_step}));
}
+// Demonstrate the weaknesses of the default configuration:
+// ========================================================
+
+TEST_F(DefaultZ3CrosscheckOracleTest, ManySlowQueriesHangTheAnalyzer) {
+ // Simulate many slow queries: 250 slow UNSAT queries.
+ // 250*14000ms = 3500s, ~1 hour. Since we disabled the total time limitation,
+ // this eqclass would take roughly 1 hour to process.
+ // It doesn't matter what rlimit the queries consume.
+ for (int i = 0; i < 250; ++i) {
+ ASSERT_EQ(RejectReport,
+ interpretQueryResult({UNSAT, 14'000_ms, 1'000'000_step}));
+ }
+}
+
} // namespace
More information about the cfe-commits
mailing list