[clang] 7d935d0 - [dataflow] improve determinism of generated SAT system
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Tue Jul 11 23:09:16 PDT 2023
Author: Sam McCall
Date: 2023-07-12T08:09:10+02:00
New Revision: 7d935d083659c9ddaf3389c1747fc8c59f07e3fc
URL: https://github.com/llvm/llvm-project/commit/7d935d083659c9ddaf3389c1747fc8c59f07e3fc
DIFF: https://github.com/llvm/llvm-project/commit/7d935d083659c9ddaf3389c1747fc8c59f07e3fc.diff
LOG: [dataflow] improve determinism of generated SAT system
Fixes two places where we relied on map iteration order when processing
values, which leaked nondeterminism into the generated SAT formulas.
Adds a couple of tests that directly assert that the SAT system is
equivalent on each run.
It's desirable that the formulas are deterministic based on the input:
- our SAT solver is naive and perfermance is sensitive to even simple
semantics-preserving transformations like A|B to B|A.
(e.g. it's likely to choose a different variable to split on).
Timeout failures are bad, but *flaky* ones are terrible to debug.
- similarly when debugging, it's important to have a consistent
understanding of what e.g. "V23" means across runs.
---
Both changes in this patch were isolated from a nullability analysis of
real-world code which was extremely slow, spending ages in the SAT
solver at "random" points that varied on each run.
I've included a reduced version of the code as a regression test.
One of the changes shows up directly as flow-condition nondeterminism
with a no-op analysis, the other relied on bits of the nullability
analysis but I found a synthetic example to show the problem.
Differential Revision: https://reviews.llvm.org/D154948
Added:
clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp
Modified:
clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
Removed:
################################################################################
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 512f66073da457..6fd4a085fa7a5f 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -28,6 +28,7 @@
#include "clang/Analysis/FlowSensitive/Value.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include <memory>
@@ -634,8 +635,9 @@ class Environment {
// block.
llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
-
- llvm::DenseMap<const StorageLocation *, Value *> LocToVal;
+ // We preserve insertion order so that join/widen process values in
+ // deterministic sequence. This in turn produces deterministic SAT formulas.
+ llvm::MapVector<const StorageLocation *, Value *> LocToVal;
// Maps locations of struct members to symbolic values of the structs that own
// them and the decls of the struct members.
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 9b1fca41e911cc..3cf3c721763085 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -20,6 +20,7 @@
#include "clang/Analysis/FlowSensitive/Value.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
@@ -505,7 +506,7 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
// assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size());
- llvm::DenseMap<const StorageLocation *, Value *> WidenedLocToVal;
+ llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal;
for (auto &Entry : LocToVal) {
const StorageLocation *Loc = Entry.first;
assert(Loc != nullptr);
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 9e48df1b994dc6..183cb32e5e7687 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -275,8 +275,7 @@ class JoinedStateBuilder {
/// `std::nullopt` represent basic blocks that are not evaluated yet.
static TypeErasedDataflowAnalysisState
computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
- llvm::DenseSet<const CFGBlock *> Preds;
- Preds.insert(Block.pred_begin(), Block.pred_end());
+ std::vector<const CFGBlock *> Preds(Block.pred_begin(), Block.pred_end());
if (Block.getTerminator().isTemporaryDtorsBranch()) {
// This handles a special case where the code that produced the CFG includes
// a conditional operator with a branch that constructs a temporary and
@@ -305,7 +304,7 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
auto &StmtToBlock = AC.CFCtx.getStmtToBlock();
auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt());
assert(StmtBlock != StmtToBlock.end());
- Preds.erase(StmtBlock->getSecond());
+ llvm::erase_value(Preds, StmtBlock->getSecond());
}
}
diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
index 81f804d10bf7cd..a9c07d930cdd07 100644
--- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
+++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
@@ -10,6 +10,7 @@ add_clang_unittest(ClangAnalysisFlowSensitiveTests
DataflowAnalysisContextTest.cpp
DataflowEnvironmentTest.cpp
DebugSupportTest.cpp
+ DeterminismTest.cpp
LoggerTest.cpp
MapLatticeTest.cpp
MatchSwitchTest.cpp
diff --git a/clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp b/clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp
new file mode 100644
index 00000000000000..5ba26a08320364
--- /dev/null
+++ b/clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp
@@ -0,0 +1,137 @@
+//===- unittests/Analysis/FlowSensitive/DeterminismTest.cpp ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestingSupport.h"
+#include "clang/AST/Decl.h"
+#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/Formula.h"
+#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
+#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Testing/TestAST.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <memory>
+#include <string>
+
+namespace clang::dataflow {
+
+// Run a no-op analysis, and return a textual representation of the
+// flow-condition at function exit.
+std::string analyzeAndPrintExitCondition(llvm::StringRef Code) {
+ DataflowAnalysisContext DACtx(std::make_unique<WatchedLiteralsSolver>());
+ clang::TestAST AST(Code);
+ const auto *Target =
+ cast<FunctionDecl>(test::findValueDecl(AST.context(), "target"));
+ Environment InitEnv(DACtx, *Target);
+ auto CFCtx = cantFail(ControlFlowContext::build(*Target));
+
+ NoopAnalysis Analysis(AST.context(), DataflowAnalysisOptions{});
+
+ auto Result = runDataflowAnalysis(CFCtx, Analysis, InitEnv);
+ EXPECT_FALSE(!Result) << Result.takeError();
+
+ Atom FinalFC = (*Result)[CFCtx.getCFG().getExit().getBlockID()]
+ ->Env.getFlowConditionToken();
+ std::string Textual;
+ llvm::raw_string_ostream OS(Textual);
+ DACtx.dumpFlowCondition(FinalFC, OS);
+ return Textual;
+}
+
+TEST(DeterminismTest, NestedSwitch) {
+ // Example extracted from real-world code that had wildly nondeterministic
+ // analysis times.
+ // Its flow condition depends on the order we join predecessor blocks.
+ const char *Code = R"cpp(
+ struct Tree;
+ struct Rep {
+ Tree *tree();
+ int length;
+ };
+ struct Tree {
+ int height();
+ Rep *edge(int);
+ int length;
+ };
+ struct RetVal {};
+ int getInt();
+ bool maybe();
+
+ RetVal make(int size);
+ inline RetVal target(int size, Tree& self) {
+ Tree* tree = &self;
+ const int height = self.height();
+ Tree* n1 = tree;
+ Tree* n2 = tree;
+ switch (height) {
+ case 3:
+ tree = tree->edge(0)->tree();
+ if (maybe()) return {};
+ n2 = tree;
+ case 2:
+ tree = tree->edge(0)->tree();
+ n1 = tree;
+ if (maybe()) return {};
+ case 1:
+ tree = tree->edge(0)->tree();
+ if (maybe()) return {};
+ case 0:
+ Rep* edge = tree->edge(0);
+ if (maybe()) return {};
+ int avail = getInt();
+ if (avail == 0) return {};
+ int delta = getInt();
+ RetVal span = {};
+ edge->length += delta;
+ switch (height) {
+ case 3:
+ n1->length += delta;
+ case 2:
+ n1->length += delta;
+ case 1:
+ n1->length += delta;
+ case 0:
+ n1->length += delta;
+ return span;
+ }
+ break;
+ }
+ return make(size);
+ }
+ )cpp";
+
+ std::string Cond = analyzeAndPrintExitCondition(Code);
+ for (unsigned I = 0; I < 10; ++I)
+ EXPECT_EQ(Cond, analyzeAndPrintExitCondition(Code));
+}
+
+TEST(DeterminismTest, ValueMergeOrder) {
+ // Artificial example whose final flow condition variable numbering depends
+ // on the order in which we merge a, b, and c.
+ const char *Code = R"cpp(
+ bool target(bool a, bool b, bool c) {
+ if (a)
+ b = c;
+ else
+ c = b;
+ return a && b && c;
+ }
+ )cpp";
+
+ std::string Cond = analyzeAndPrintExitCondition(Code);
+ for (unsigned I = 0; I < 10; ++I)
+ EXPECT_EQ(Cond, analyzeAndPrintExitCondition(Code));
+}
+
+} // namespace clang::dataflow
More information about the cfe-commits
mailing list