[clang] 7d935d0 - [dataflow] improve determinism of generated SAT system

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 11 23:09:16 PDT 2023


Author: Sam McCall
Date: 2023-07-12T08:09:10+02:00
New Revision: 7d935d083659c9ddaf3389c1747fc8c59f07e3fc

URL: https://github.com/llvm/llvm-project/commit/7d935d083659c9ddaf3389c1747fc8c59f07e3fc
DIFF: https://github.com/llvm/llvm-project/commit/7d935d083659c9ddaf3389c1747fc8c59f07e3fc.diff

LOG: [dataflow] improve determinism of generated SAT system

Fixes two places where we relied on map iteration order when processing
values, which leaked nondeterminism into the generated SAT formulas.
Adds a couple of tests that directly assert that the SAT system is
equivalent on each run.

It's desirable that the formulas are deterministic based on the input:

 - our SAT solver is naive and perfermance is sensitive to even simple
   semantics-preserving transformations like A|B to B|A.
   (e.g. it's likely to choose a different variable to split on).
   Timeout failures are bad, but *flaky* ones are terrible to debug.
 - similarly when debugging, it's important to have a consistent
   understanding of what e.g. "V23" means across runs.

---

Both changes in this patch were isolated from a nullability analysis of
real-world code which was extremely slow, spending ages in the SAT
solver at "random" points that varied on each run.
I've included a reduced version of the code as a regression test.

One of the changes shows up directly as flow-condition nondeterminism
with a no-op analysis, the other relied on bits of the nullability
analysis but I found a synthetic example to show the problem.

Differential Revision: https://reviews.llvm.org/D154948

Added: 
    clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp

Modified: 
    clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
    clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
    clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
    clang/unittests/Analysis/FlowSensitive/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 512f66073da457..6fd4a085fa7a5f 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -28,6 +28,7 @@
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <memory>
@@ -634,8 +635,9 @@ class Environment {
   // block.
   llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
   llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
-
-  llvm::DenseMap<const StorageLocation *, Value *> LocToVal;
+  // We preserve insertion order so that join/widen process values in
+  // deterministic sequence. This in turn produces deterministic SAT formulas.
+  llvm::MapVector<const StorageLocation *, Value *> LocToVal;
 
   // Maps locations of struct members to symbolic values of the structs that own
   // them and the decls of the struct members.

diff  --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 9b1fca41e911cc..3cf3c721763085 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -20,6 +20,7 @@
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -505,7 +506,7 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
   // assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size());
 
-  llvm::DenseMap<const StorageLocation *, Value *> WidenedLocToVal;
+  llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal;
   for (auto &Entry : LocToVal) {
     const StorageLocation *Loc = Entry.first;
     assert(Loc != nullptr);

diff  --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 9e48df1b994dc6..183cb32e5e7687 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -275,8 +275,7 @@ class JoinedStateBuilder {
 ///   `std::nullopt` represent basic blocks that are not evaluated yet.
 static TypeErasedDataflowAnalysisState
 computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
-  llvm::DenseSet<const CFGBlock *> Preds;
-  Preds.insert(Block.pred_begin(), Block.pred_end());
+  std::vector<const CFGBlock *> Preds(Block.pred_begin(), Block.pred_end());
   if (Block.getTerminator().isTemporaryDtorsBranch()) {
     // This handles a special case where the code that produced the CFG includes
     // a conditional operator with a branch that constructs a temporary and
@@ -305,7 +304,7 @@ computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
       auto &StmtToBlock = AC.CFCtx.getStmtToBlock();
       auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt());
       assert(StmtBlock != StmtToBlock.end());
-      Preds.erase(StmtBlock->getSecond());
+      llvm::erase_value(Preds, StmtBlock->getSecond());
     }
   }
 

diff  --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
index 81f804d10bf7cd..a9c07d930cdd07 100644
--- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
+++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt
@@ -10,6 +10,7 @@ add_clang_unittest(ClangAnalysisFlowSensitiveTests
   DataflowAnalysisContextTest.cpp
   DataflowEnvironmentTest.cpp
   DebugSupportTest.cpp
+  DeterminismTest.cpp
   LoggerTest.cpp
   MapLatticeTest.cpp
   MatchSwitchTest.cpp

diff  --git a/clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp b/clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp
new file mode 100644
index 00000000000000..5ba26a08320364
--- /dev/null
+++ b/clang/unittests/Analysis/FlowSensitive/DeterminismTest.cpp
@@ -0,0 +1,137 @@
+//===- unittests/Analysis/FlowSensitive/DeterminismTest.cpp ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestingSupport.h"
+#include "clang/AST/Decl.h"
+#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/Formula.h"
+#include "clang/Analysis/FlowSensitive/NoopAnalysis.h"
+#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Testing/TestAST.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <memory>
+#include <string>
+
+namespace clang::dataflow {
+
+// Run a no-op analysis, and return a textual representation of the
+// flow-condition at function exit.
+std::string analyzeAndPrintExitCondition(llvm::StringRef Code) {
+  DataflowAnalysisContext DACtx(std::make_unique<WatchedLiteralsSolver>());
+  clang::TestAST AST(Code);
+  const auto *Target =
+      cast<FunctionDecl>(test::findValueDecl(AST.context(), "target"));
+  Environment InitEnv(DACtx, *Target);
+  auto CFCtx = cantFail(ControlFlowContext::build(*Target));
+
+  NoopAnalysis Analysis(AST.context(), DataflowAnalysisOptions{});
+
+  auto Result = runDataflowAnalysis(CFCtx, Analysis, InitEnv);
+  EXPECT_FALSE(!Result) << Result.takeError();
+
+  Atom FinalFC = (*Result)[CFCtx.getCFG().getExit().getBlockID()]
+                     ->Env.getFlowConditionToken();
+  std::string Textual;
+  llvm::raw_string_ostream OS(Textual);
+  DACtx.dumpFlowCondition(FinalFC, OS);
+  return Textual;
+}
+
+TEST(DeterminismTest, NestedSwitch) {
+  // Example extracted from real-world code that had wildly nondeterministic
+  // analysis times.
+  // Its flow condition depends on the order we join predecessor blocks.
+  const char *Code = R"cpp(
+    struct Tree;
+    struct Rep {
+      Tree *tree();
+      int length;
+    };
+    struct Tree {
+      int height();
+      Rep *edge(int);
+      int length;
+    };
+    struct RetVal {};
+    int getInt();
+    bool maybe();
+
+    RetVal make(int size);
+    inline RetVal target(int size, Tree& self) {
+      Tree* tree = &self;
+      const int height = self.height();
+      Tree* n1 = tree;
+      Tree* n2 = tree;
+      switch (height) {
+        case 3:
+          tree = tree->edge(0)->tree();
+          if (maybe()) return {};
+          n2 = tree;
+        case 2:
+          tree = tree->edge(0)->tree();
+          n1 = tree;
+          if (maybe()) return {};
+        case 1:
+          tree = tree->edge(0)->tree();
+          if (maybe()) return {};
+        case 0:
+          Rep* edge = tree->edge(0);
+          if (maybe()) return {};
+          int avail = getInt();
+          if (avail == 0) return {};
+          int delta = getInt();
+          RetVal span = {};
+          edge->length += delta;
+          switch (height) {
+            case 3:
+              n1->length += delta;
+            case 2:
+              n1->length += delta;
+            case 1:
+              n1->length += delta;
+            case 0:
+              n1->length += delta;
+              return span;
+          }
+          break;
+      }
+      return make(size);
+    }
+  )cpp";
+
+  std::string Cond = analyzeAndPrintExitCondition(Code);
+  for (unsigned I = 0; I < 10; ++I)
+    EXPECT_EQ(Cond, analyzeAndPrintExitCondition(Code));
+}
+
+TEST(DeterminismTest, ValueMergeOrder) {
+  // Artificial example whose final flow condition variable numbering depends
+  // on the order in which we merge a, b, and c.
+  const char *Code = R"cpp(
+    bool target(bool a, bool b, bool c) {
+      if (a)
+        b = c;
+      else
+        c = b;
+      return a && b && c;
+    }
+  )cpp";
+
+  std::string Cond = analyzeAndPrintExitCondition(Code);
+  for (unsigned I = 0; I < 10; ++I)
+    EXPECT_EQ(Cond, analyzeAndPrintExitCondition(Code));
+}
+
+} // namespace clang::dataflow


        


More information about the cfe-commits mailing list