[clang] [analyzer][Z3] Restore the original timeout of 15s (PR #118291)

Balazs Benics via cfe-commits cfe-commits at lists.llvm.org
Thu Dec 5 05:13:38 PST 2024


https://github.com/steakhal updated https://github.com/llvm/llvm-project/pull/118291

>From 1fb92742a066444d4a074655704c8148ce1f8326 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus at gmail.com>
Date: Mon, 2 Dec 2024 11:21:05 +0100
Subject: [PATCH 1/5] [analyzer][Z3] Restore the original timeout of 15s

Discussion here:
https://discourse.llvm.org/t/analyzer-rfc-taming-z3-query-times/79520/15?u=szelethus

The original patch, #97298 introduced new timeouts backed by thorough
testing and measurements to keep the running time of Z3 within
reasonable limits. The measurements also showed that only certain
reports and certain TUs were responsible for the poor performance of Z3
refutation.

Unfortunately, it seems like that on machines with different
characteristics (slower machines) the current timeouts don't just axe
0.01% of reports, but many more as well. Considering that timeouts are
inherently nondeterministic as a cutoff point, this lead reports sets
being vastly different on the same projects with the same configuration.
The discussion link shows that all configurations introduced in the
patch with their default values lead to severa nondeterminism of the
analyzer. As we, and others use the analyzer as a gating tool for PRs,
we should revert to the original defaults.

We should respect that
* There are still parts of the analyzer that are either proven or
  suspected to contain nondeterministic code (like pointer sets),
* A 15s timeout is more likely to hit the same reports every time on a
  wider range of machines, but is still inherently nondeterministic, but
  an infinite timeout leads to the tool hanging,
* If you measure the performance of the analyzer on your machines, you
  can and should achieve some speedup with little or no observable
  nondeterminism.
---
 .../clang/StaticAnalyzer/Core/AnalyzerOptions.def | 15 +++++++++------
 clang/test/Analysis/analyzer-config.c             |  6 +++---
 .../StaticAnalyzer/Z3CrosscheckOracleTest.cpp     | 14 +++++++-------
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 737bc8e86cfb6a..64fb11821a2656 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -189,20 +189,23 @@ ANALYZER_OPTION(
     "crosscheck-with-z3-eqclass-timeout-threshold",
     "Set a timeout for bug report equivalence classes in milliseconds. "
     "If we exhaust this threshold, we will drop the bug report eqclass "
-    "instead of doing more Z3 queries. Set 0 for no timeout.", 700)
+    "instead of doing more Z3 queries. On fast machines, 700 is a sane value. "
+    "Set 0 for no timeout.", 0)
 
 ANALYZER_OPTION(
     unsigned, Z3CrosscheckTimeoutThreshold,
     "crosscheck-with-z3-timeout-threshold",
-    "Set a timeout for individual Z3 queries in milliseconds. "
-    "Set 0 for no timeout.", 300)
+    "Set a timeout for individual Z3 queries in milliseconds. On fast "
+    "machines, 400 is a sane value. "
+    "Set 0 for no timeout.", 15'000)
 
 ANALYZER_OPTION(
     unsigned, Z3CrosscheckRLimitThreshold,
     "crosscheck-with-z3-rlimit-threshold",
-    "Set the Z3 resource limit threshold. This sets a deterministic cutoff "
-    "point for Z3 queries, as longer queries usually consume more resources. "
-    "Set 0 for unlimited.", 400'000)
+    "Set the Z3 resource limit threshold. This sets a supposedly deterministic "
+    "cutoff point for Z3 queries, as longer queries usually consume more "
+    "resources. "
+    "Set 0 for unlimited.", 0)
 
 ANALYZER_OPTION(bool, ShouldReportIssuesInMainSourceFile,
                 "report-in-main-source-file",
diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c
index 8eb869bac46f8f..0f1314aae9db57 100644
--- a/clang/test/Analysis/analyzer-config.c
+++ b/clang/test/Analysis/analyzer-config.c
@@ -41,9 +41,9 @@
 // CHECK-NEXT: cplusplus.Move:WarnOn = KnownsAndLocals
 // CHECK-NEXT: cplusplus.SmartPtrModeling:ModelSmartPtrDereference = false
 // CHECK-NEXT: crosscheck-with-z3 = false
-// CHECK-NEXT: crosscheck-with-z3-eqclass-timeout-threshold = 700
-// CHECK-NEXT: crosscheck-with-z3-rlimit-threshold = 400000
-// CHECK-NEXT: crosscheck-with-z3-timeout-threshold = 300
+// CHECK-NEXT: crosscheck-with-z3-eqclass-timeout-threshold = 0
+// CHECK-NEXT: crosscheck-with-z3-rlimit-threshold = 0
+// CHECK-NEXT: crosscheck-with-z3-timeout-threshold = 15000
 // CHECK-NEXT: ctu-dir = ""
 // CHECK-NEXT: ctu-import-cpp-threshold = 8
 // CHECK-NEXT: ctu-import-threshold = 24
diff --git a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
index ef07e47ee911b2..a8cb2782c7b72f 100644
--- a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
+++ b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
@@ -38,8 +38,8 @@ static const AnalyzerOptions DefaultOpts = [] {
 
   // Remember to update the tests in this file when these values change.
   // Also update the doc comment of `interpretQueryResult`.
-  assert(Config.Z3CrosscheckRLimitThreshold == 400'000);
-  assert(Config.Z3CrosscheckTimeoutThreshold == 300_ms);
+  assert(Config.Z3CrosscheckRLimitThreshold == 0);
+  assert(Config.Z3CrosscheckTimeoutThreshold == 15'000_ms);
   // Usually, when the timeout/rlimit threshold is reached, Z3 only slightly
   // overshoots until it realizes that it overshoot and needs to back off.
   // Consequently, the measured timeout should be fairly close to the threshold.
@@ -74,13 +74,13 @@ TEST_F(Z3CrosscheckOracleTest, SATWhenItGoesOverTime) {
 }
 
 TEST_F(Z3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
-  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 310_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 310_ms, 1000_step}));
 }
 
 TEST_F(Z3CrosscheckOracleTest, RejectsTimeout) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
-  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 310_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNDEF, 310_ms, 1000_step}));
 }
 
 TEST_F(Z3CrosscheckOracleTest, RejectsUNSATs) {
@@ -97,7 +97,7 @@ TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
   // Simulate long queries, that barely doesn't trigger the timeout.
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
-  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
 }
 
 TEST_F(Z3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
@@ -114,7 +114,7 @@ TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
     ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 20_ms, 1000_step}));
   }
   // Do one more to trigger the heuristic.
-  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 1_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 1_ms, 1000_step}));
 }
 
 TEST_F(Z3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
@@ -131,7 +131,7 @@ TEST_F(Z3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
 TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
-  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 25_ms, 405'000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNDEF, 25_ms, 405'000_step}));
 }
 
 TEST_F(Z3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {

>From d308764a18b11635f46618376e92be0196529250 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Thu, 5 Dec 2024 13:00:55 +0100
Subject: [PATCH 2/5] NFC Reword some crosscheck config descriptions

---
 .../clang/StaticAnalyzer/Core/AnalyzerOptions.def  | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
index 64fb11821a2656..efed65e6c986da 100644
--- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -189,14 +189,19 @@ ANALYZER_OPTION(
     "crosscheck-with-z3-eqclass-timeout-threshold",
     "Set a timeout for bug report equivalence classes in milliseconds. "
     "If we exhaust this threshold, we will drop the bug report eqclass "
-    "instead of doing more Z3 queries. On fast machines, 700 is a sane value. "
+    "instead of doing more Z3 queries. Setting this to 700 ms in conjunction "
+    "with \"crosscheck-with-z3-timeout-threshold\" of 300 ms, would nicely "
+    "guarantee that no bug report equivalence class can take longer than "
+    "1 second, effectively mitigating Z3 hangs during refutation. "
     "Set 0 for no timeout.", 0)
 
 ANALYZER_OPTION(
     unsigned, Z3CrosscheckTimeoutThreshold,
     "crosscheck-with-z3-timeout-threshold",
-    "Set a timeout for individual Z3 queries in milliseconds. On fast "
-    "machines, 400 is a sane value. "
+    "Set a timeout for individual Z3 queries in milliseconds. "
+    "On fast machines, 300 worked well in some cases. "
+    "The lower it is, the higher the chances of having flaky issues. "
+    "Having no timeout may hang the analyzer indefinitely. "
     "Set 0 for no timeout.", 15'000)
 
 ANALYZER_OPTION(
@@ -205,7 +210,8 @@ ANALYZER_OPTION(
     "Set the Z3 resource limit threshold. This sets a supposedly deterministic "
     "cutoff point for Z3 queries, as longer queries usually consume more "
     "resources. "
-    "Set 0 for unlimited.", 0)
+    "400'000 should on average make Z3 queries run for up to 100ms on modern "
+    "hardware. Set 0 for unlimited.", 0)
 
 ANALYZER_OPTION(bool, ShouldReportIssuesInMainSourceFile,
                 "report-in-main-source-file",

>From 40dbfeec2cff0c56c667023108d7461e2d91c204 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Thu, 5 Dec 2024 13:45:20 +0100
Subject: [PATCH 3/5] Duplicate tests for testing the customized Oracle

We will have two fixtures:
 - DefaultZ3CrosscheckOracleTest (default options)
 - LimitedZ3CrosscheckOracleTest
   (eqclass timeout 700ms, 300ms and 400k rlimit per query, aka. the previous default)

Each test is duplicated to showcase the different behavior in the two
configs.
---
 .../StaticAnalyzer/Z3CrosscheckOracleTest.cpp | 108 +++++++++++++++---
 1 file changed, 93 insertions(+), 15 deletions(-)

diff --git a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
index a8cb2782c7b72f..3aca7a58e8480d 100644
--- a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
+++ b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
@@ -47,8 +47,17 @@ static const AnalyzerOptions DefaultOpts = [] {
   return Config;
 }();
 
+static const AnalyzerOptions LimitedOpts = [] {
+  AnalyzerOptions Config = DefaultOpts;
+  Config.Z3CrosscheckEQClassTimeoutThreshold = 700_ms;
+  Config.Z3CrosscheckTimeoutThreshold = 300_step;
+  Config.Z3CrosscheckRLimitThreshold = 400'000_step;
+  return Config;
+}();
+
 namespace {
 
+template <const AnalyzerOptions &Opts>
 class Z3CrosscheckOracleTest : public testing::Test {
 public:
   Z3Decision interpretQueryResult(const Z3Result &Result) {
@@ -56,58 +65,98 @@ class Z3CrosscheckOracleTest : public testing::Test {
   }
 
 private:
-  Z3CrosscheckOracle Oracle = Z3CrosscheckOracle(DefaultOpts);
+  Z3CrosscheckOracle Oracle = Z3CrosscheckOracle(Opts);
 };
 
-TEST_F(Z3CrosscheckOracleTest, AcceptsFirstSAT) {
+using DefaultZ3CrosscheckOracleTest = Z3CrosscheckOracleTest<DefaultOpts>;
+using LimitedZ3CrosscheckOracleTest = Z3CrosscheckOracleTest<LimitedOpts>;
+
+TEST_F(DefaultZ3CrosscheckOracleTest, AcceptsFirstSAT) {
+  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, AcceptsFirstSAT) {
   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
 }
 
-TEST_F(Z3CrosscheckOracleTest, AcceptsSAT) {
+TEST_F(DefaultZ3CrosscheckOracleTest, AcceptsSAT) {
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, AcceptsSAT) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 1000_step}));
 }
 
-TEST_F(Z3CrosscheckOracleTest, SATWhenItGoesOverTime) {
+TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItGoesOverTime) {
+  // Even if it times out, if it is SAT, we should accept it.
+  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 310_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItGoesOverTime) {
   // Even if it times out, if it is SAT, we should accept it.
   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 310_ms, 1000_step}));
 }
 
-TEST_F(Z3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
+TEST_F(DefaultZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 310_ms, 1000_step}));
 }
+TEST_F(LimitedZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
+  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 310_ms, 1000_step}));
+}
 
-TEST_F(Z3CrosscheckOracleTest, RejectsTimeout) {
+TEST_F(DefaultZ3CrosscheckOracleTest, RejectsTimeout) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNDEF, 310_ms, 1000_step}));
 }
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectsTimeout) {
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 310_ms, 1000_step}));
+}
 
-TEST_F(Z3CrosscheckOracleTest, RejectsUNSATs) {
+TEST_F(DefaultZ3CrosscheckOracleTest, RejectsUNSATs) {
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectsUNSATs) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
 }
 
-// Testing cut heuristics:
-// =======================
+// Testing cut heuristics of the two configurations:
+// =================================================
 
-TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
+TEST_F(DefaultZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
   // Simulate long queries, that barely doesn't trigger the timeout.
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
 }
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
+  // Simulate long queries, that barely doesn't trigger the timeout.
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
+  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
+}
 
-TEST_F(Z3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
+TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
+  // Simulate long queries, that barely doesn't trigger the timeout.
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
+  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 290_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
   // Simulate long queries, that barely doesn't trigger the timeout.
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 290_ms, 1000_step}));
 }
 
-TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
+TEST_F(DefaultZ3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
   // Simulate quick, but many queries: 35 quick UNSAT queries.
   // 35*20ms = 700ms, which is equal to the 700ms threshold.
   for (int i = 0; i < 35; ++i) {
@@ -116,8 +165,27 @@ TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
   // Do one more to trigger the heuristic.
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 1_ms, 1000_step}));
 }
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
+  // Simulate quick, but many queries: 35 quick UNSAT queries.
+  // 35*20ms = 700ms, which is equal to the 700ms threshold.
+  for (int i = 0; i < 35; ++i) {
+    ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 20_ms, 1000_step}));
+  }
+  // Do one more to trigger the heuristic.
+  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 1_ms, 1000_step}));
+}
 
-TEST_F(Z3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
+TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
+  // Simulate quick, but many queries: 35 quick UNSAT queries.
+  // 35*20ms = 700ms, which is equal to the 700ms threshold.
+  for (int i = 0; i < 35; ++i) {
+    ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 20_ms, 1000_step}));
+  }
+  // Do one more to trigger the heuristic, but given this was SAT, we still
+  // accept the query.
+  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 200_ms, 1000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
   // Simulate quick, but many queries: 35 quick UNSAT queries.
   // 35*20ms = 700ms, which is equal to the 700ms threshold.
   for (int i = 0; i < 35; ++i) {
@@ -128,13 +196,23 @@ TEST_F(Z3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 200_ms, 1000_step}));
 }
 
-TEST_F(Z3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) {
+TEST_F(DefaultZ3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNDEF, 25_ms, 405'000_step}));
 }
+TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) {
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 25_ms, 405'000_step}));
+}
 
-TEST_F(Z3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {
+TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
+  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 405'000_step}));
+}
+TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 405'000_step}));

>From 453aafcea09ddcc89172e884242d52e57ab8f405 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Thu, 5 Dec 2024 14:07:33 +0100
Subject: [PATCH 4/5] Revisit the tests and ensure they have the right
 thresholds to test what they intended to test

Some tests were dropped because they only make sense to test if the
given configuration is not disabled.
---
 .../StaticAnalyzer/Z3CrosscheckOracleTest.cpp | 57 ++++++-------------
 1 file changed, 18 insertions(+), 39 deletions(-)

diff --git a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
index 3aca7a58e8480d..ba99bb3dc8ba8f 100644
--- a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
+++ b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
@@ -89,7 +89,7 @@ TEST_F(LimitedZ3CrosscheckOracleTest, AcceptsSAT) {
 
 TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItGoesOverTime) {
   // Even if it times out, if it is SAT, we should accept it.
-  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 310_ms, 1000_step}));
+  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 15'010_ms, 1000_step}));
 }
 TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItGoesOverTime) {
   // Even if it times out, if it is SAT, we should accept it.
@@ -97,7 +97,7 @@ TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItGoesOverTime) {
 }
 
 TEST_F(DefaultZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 310_ms, 1000_step}));
+  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 15'010_ms, 1000_step}));
 }
 TEST_F(LimitedZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 310_ms, 1000_step}));
@@ -106,7 +106,7 @@ TEST_F(LimitedZ3CrosscheckOracleTest, UNSATWhenItGoesOverTime) {
 TEST_F(DefaultZ3CrosscheckOracleTest, RejectsTimeout) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNDEF, 310_ms, 1000_step}));
+  ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 15'010_ms, 1000_step}));
 }
 TEST_F(LimitedZ3CrosscheckOracleTest, RejectsTimeout) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
@@ -132,9 +132,9 @@ TEST_F(LimitedZ3CrosscheckOracleTest, RejectsUNSATs) {
 
 TEST_F(DefaultZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
   // Simulate long queries, that barely doesn't trigger the timeout.
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
 }
 TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
   // Simulate long queries, that barely doesn't trigger the timeout.
@@ -145,9 +145,9 @@ TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfSpendsTooMuchTotalTime) {
 
 TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
   // Simulate long queries, that barely doesn't trigger the timeout.
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 290_ms, 1000_step}));
-  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 290_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
+  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 14'990_ms, 1000_step}));
+  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 14'990_ms, 1000_step}));
 }
 TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
   // Simulate long queries, that barely doesn't trigger the timeout.
@@ -156,15 +156,8 @@ TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItSpendsTooMuchTotalTime) {
   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 290_ms, 1000_step}));
 }
 
-TEST_F(DefaultZ3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
-  // Simulate quick, but many queries: 35 quick UNSAT queries.
-  // 35*20ms = 700ms, which is equal to the 700ms threshold.
-  for (int i = 0; i < 35; ++i) {
-    ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 20_ms, 1000_step}));
-  }
-  // Do one more to trigger the heuristic.
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 1_ms, 1000_step}));
-}
+// Z3CrosscheckEQClassTimeoutThreshold is disabled in default configuration, so
+// it doesn't make sense to test that.
 TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
   // Simulate quick, but many queries: 35 quick UNSAT queries.
   // 35*20ms = 700ms, which is equal to the 700ms threshold.
@@ -175,17 +168,9 @@ TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfAttemptsManySmallQueries) {
   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNSAT, 1_ms, 1000_step}));
 }
 
-TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
-  // Simulate quick, but many queries: 35 quick UNSAT queries.
-  // 35*20ms = 700ms, which is equal to the 700ms threshold.
-  for (int i = 0; i < 35; ++i) {
-    ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 20_ms, 1000_step}));
-  }
-  // Do one more to trigger the heuristic, but given this was SAT, we still
-  // accept the query.
-  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 200_ms, 1000_step}));
-}
-TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
+// Z3CrosscheckEQClassTimeoutThreshold is disabled in default configuration, so
+// it doesn't make sense to test that.
+TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItAttemptsManySmallQueries) {
   // Simulate quick, but many queries: 35 quick UNSAT queries.
   // 35*20ms = 700ms, which is equal to the 700ms threshold.
   for (int i = 0; i < 35; ++i) {
@@ -196,22 +181,16 @@ TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenIfAttemptsManySmallQueries) {
   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 200_ms, 1000_step}));
 }
 
-TEST_F(DefaultZ3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) {
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNDEF, 25_ms, 405'000_step}));
-}
+// Z3CrosscheckRLimitThreshold is disabled in default configuration, so it
+// doesn't make sense to test that.
 TEST_F(LimitedZ3CrosscheckOracleTest, RejectEQClassIfExhaustsRLimit) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectEQClass, interpretQueryResult({UNDEF, 25_ms, 405'000_step}));
 }
 
-TEST_F(DefaultZ3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
-  ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
-  ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 405'000_step}));
-}
+// Z3CrosscheckRLimitThreshold is disabled in default configuration, so it
+// doesn't make sense to test that.
 TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));
   ASSERT_EQ(RejectReport, interpretQueryResult({UNSAT, 25_ms, 1000_step}));

>From 4b63bf73a40786b5f1e66dc52f1c0f66b71e99f5 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs at gmail.com>
Date: Thu, 5 Dec 2024 14:11:03 +0100
Subject: [PATCH 5/5] Demonstrate the weakness of the default configuration

---
 .../StaticAnalyzer/Z3CrosscheckOracleTest.cpp      | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
index ba99bb3dc8ba8f..626f5c163d17d0 100644
--- a/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
+++ b/clang/unittests/StaticAnalyzer/Z3CrosscheckOracleTest.cpp
@@ -197,4 +197,18 @@ TEST_F(LimitedZ3CrosscheckOracleTest, SATWhenItExhaustsRLimit) {
   ASSERT_EQ(AcceptReport, interpretQueryResult({SAT, 25_ms, 405'000_step}));
 }
 
+// Demonstrate the weaknesses of the default configuration:
+// ========================================================
+
+TEST_F(DefaultZ3CrosscheckOracleTest, ManySlowQueriesHangTheAnalyzer) {
+  // Simulate many slow queries: 250 slow UNSAT queries.
+  // 250*14000ms = 3500s, ~1 hour. Since we disabled the total time limitation,
+  // this eqclass would take roughly 1 hour to process.
+  // It doesn't matter what rlimit the queries consume.
+  for (int i = 0; i < 250; ++i) {
+    ASSERT_EQ(RejectReport,
+              interpretQueryResult({UNSAT, 14'000_ms, 1'000'000_step}));
+  }
+}
+
 } // namespace



More information about the cfe-commits mailing list