[clang] [dataflow] Parse formulas from text (PR #66424)

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 14 12:39:38 PDT 2023


https://github.com/sam-mccall created https://github.com/llvm/llvm-project/pull/66424:

My immediate use for this is not in checked-in code, but rather the
ability to plug printed flow conditions (from analysis logs) back into
sat solver unittests to reproduce slowness.

It does allow simplifying some of the existing solver tests, though.


>From 35b33561af918fc2e13af9de18af6b181535ce49 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall at gmail.com>
Date: Thu, 14 Sep 2023 21:36:50 +0200
Subject: [PATCH] [dataflow] Parse formulas from text

My immediate use for this is not in checked-in code, but rather the
ability to plug printed flow conditions (from analysis logs) back into
sat solver unittests to reproduce slowness.

It does allow simplifying some of the existing solver tests, though.
---
 .../clang/Analysis/FlowSensitive/Arena.h      |   5 +
 .../clang/Analysis/FlowSensitive/Formula.h    |   3 +
 clang/lib/Analysis/FlowSensitive/Arena.cpp    |  93 +++++++++++
 clang/lib/Analysis/FlowSensitive/Formula.cpp  |   1 +
 .../Analysis/FlowSensitive/ArenaTest.cpp      |  44 +++++
 .../Analysis/FlowSensitive/SolverTest.cpp     | 153 ++++++++----------
 6 files changed, 210 insertions(+), 89 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/Arena.h b/clang/include/clang/Analysis/FlowSensitive/Arena.h
index 373697dc7379c53..4e07053aae1af53 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Arena.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Arena.h
@@ -11,6 +11,7 @@
 #include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
+#include "llvm/ADT/StringRef.h"
 #include <vector>
 
 namespace clang::dataflow {
@@ -109,6 +110,10 @@ class Arena {
     return makeAtomRef(Value ? True : False);
   }
 
+  // Parses a formula from its textual representation.
+  // This may refer to atoms that were not produced by makeAtom() yet!
+  llvm::Expected<const Formula &> parseFormula(llvm::StringRef);
+
   /// Returns a new atomic boolean variable, distinct from any other.
   Atom makeAtom() { return static_cast<Atom>(NextAtom++); };
 
diff --git a/clang/include/clang/Analysis/FlowSensitive/Formula.h b/clang/include/clang/Analysis/FlowSensitive/Formula.h
index 64fe8f5b630a0f7..ac95a9ac7d50a1c 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Formula.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Formula.h
@@ -87,6 +87,9 @@ class alignas(const Formula *) Formula {
                          ArrayRef<const Formula *> Operands,
                          unsigned Value = 0);
 
+  // Parse Formulas using Arena rather than caling this function directly.
+  static Formula *parse(llvm::BumpPtrAllocator &Alloc, llvm::StringRef &);
+
 private:
   Formula() = default;
   Formula(const Formula &) = delete;
diff --git a/clang/lib/Analysis/FlowSensitive/Arena.cpp b/clang/lib/Analysis/FlowSensitive/Arena.cpp
index a12da2d9b555eb7..557feb373287632 100644
--- a/clang/lib/Analysis/FlowSensitive/Arena.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Arena.cpp
@@ -7,7 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Analysis/FlowSensitive/Arena.h"
+#include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
+#include "llvm/Support/Error.h"
+#include <string>
 
 namespace clang::dataflow {
 
@@ -95,4 +98,94 @@ BoolValue &Arena::makeBoolValue(const Formula &F) {
   return *It->second;
 }
 
+namespace {
+const Formula *parse(Arena &A, llvm::StringRef &In) {
+  auto EatWhitespace = [&] { In = In.ltrim(' '); };
+  EatWhitespace();
+
+  if (In.consume_front("!")) {
+    if (auto *Arg = parse(A, In))
+      return &A.makeNot(*Arg);
+    return nullptr;
+  }
+
+  if (In.consume_front("(")) {
+    auto *Arg1 = parse(A, In);
+    if (!Arg1)
+      return nullptr;
+
+    EatWhitespace();
+    decltype(&Arena::makeOr) Op;
+    if (In.consume_front("|"))
+      Op = &Arena::makeOr;
+    else if (In.consume_front("&"))
+      Op = &Arena::makeAnd;
+    else if (In.consume_front("=>"))
+      Op = &Arena::makeImplies;
+    else if (In.consume_front("="))
+      Op = &Arena::makeEquals;
+    else
+      return nullptr;
+
+    auto *Arg2 = parse(A, In);
+    if (!Arg2)
+      return nullptr;
+
+    EatWhitespace();
+    if (!In.consume_front(")"))
+      return nullptr;
+
+    return &(A.*Op)(*Arg1, *Arg2);
+  }
+
+  if (In.consume_front("V")) {
+    std::underlying_type_t<Atom> At;
+    if (In.consumeInteger(10, At))
+      return nullptr;
+    return &A.makeAtomRef(static_cast<Atom>(At));
+  }
+
+  if (In.consume_front("true"))
+    return &A.makeLiteral(true);
+  if (In.consume_front("false"))
+    return &A.makeLiteral(false);
+
+  return nullptr;
+}
+
+class FormulaParseError : public llvm::ErrorInfo<FormulaParseError> {
+  unsigned Offset;
+  std::string Formula;
+
+public:
+  static char ID;
+  FormulaParseError(llvm::StringRef Formula, unsigned Offset)
+      : Offset(Offset), Formula(Formula) {}
+
+  void log(raw_ostream &OS) const override {
+    OS << "bad formula at offset " << Offset << "\n";
+    OS << Formula << "\n";
+    OS.indent(Offset) << "^";
+  }
+
+  std::error_code convertToErrorCode() const override {
+    return std::make_error_code(std::errc::invalid_argument);
+  }
+};
+
+char FormulaParseError::ID = 0;
+
+} // namespace
+
+llvm::Expected<const Formula &> Arena::parseFormula(llvm::StringRef In) {
+  llvm::StringRef Rest = In;
+  auto *Result = parse(*this, Rest);
+  if (!Result) // parse() hit something unparseable
+    return llvm::make_error<FormulaParseError>(In, In.size() - Rest.size());
+  Rest = Rest.ltrim();
+  if (!Rest.empty()) // parse didn't consume all the input
+    return llvm::make_error<FormulaParseError>(In, In.size() - Rest.size());
+  return *Result;
+}
+
 } // namespace clang::dataflow
diff --git a/clang/lib/Analysis/FlowSensitive/Formula.cpp b/clang/lib/Analysis/FlowSensitive/Formula.cpp
index 504ad2fb7938aff..6d22efc5db07be4 100644
--- a/clang/lib/Analysis/FlowSensitive/Formula.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Formula.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
+#include <type_traits>
 
 namespace clang::dataflow {
 
diff --git a/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp b/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp
index 0f8552a58787cb4..1fc2ed2c813addc 100644
--- a/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp
@@ -8,11 +8,14 @@
 
 #include "clang/Analysis/FlowSensitive/Arena.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Testing/Support/Error.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang::dataflow {
 namespace {
+using llvm::HasValue;
+using testing::Ref;
 
 class ArenaTest : public ::testing::Test {
 protected:
@@ -137,5 +140,46 @@ TEST_F(ArenaTest, Interning) {
   EXPECT_EQ(&B1.formula(), &F1);
 }
 
+TEST_F(ArenaTest, Parse) {
+  Atom V5{5};
+  Atom V6{6};
+  EXPECT_THAT_EXPECTED(A.parseFormula("V5"), HasValue(Ref(A.makeAtomRef(V5))));
+  EXPECT_THAT_EXPECTED(A.parseFormula("true"),
+                       HasValue(Ref(A.makeLiteral(true))));
+  EXPECT_THAT_EXPECTED(A.parseFormula("!V5"),
+                       HasValue(Ref(A.makeNot(A.makeAtomRef(V5)))));
+
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V5 = V6)"),
+      HasValue(Ref(A.makeEquals(A.makeAtomRef(V5), A.makeAtomRef(V6)))));
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V5 => V6)"),
+      HasValue(Ref(A.makeImplies(A.makeAtomRef(V5), A.makeAtomRef(V6)))));
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V5 & V6)"),
+      HasValue(Ref(A.makeAnd(A.makeAtomRef(V5), A.makeAtomRef(V6)))));
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V5 | V6)"),
+      HasValue(Ref(A.makeOr(A.makeAtomRef(V5), A.makeAtomRef(V6)))));
+
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("((V5 & (V6 & !false)) => ((V5 | V6) | false))"),
+      HasValue(Ref(
+          A.makeImplies(A.makeAnd(A.makeAtomRef(V5),
+                                  A.makeAnd(A.makeAtomRef(V6),
+                                            A.makeNot(A.makeLiteral(false)))),
+                        A.makeOr(A.makeOr(A.makeAtomRef(V5), A.makeAtomRef(V6)),
+                                 A.makeLiteral(false))))));
+
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V0 => error)"), llvm::FailedWithMessage(R"(bad formula at offset 7
+(V0 => error)
+       ^)"));
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("V1 V2"), llvm::FailedWithMessage(R"(bad formula at offset 3
+V1 V2
+   ^)"));
+}
+
 } // namespace
 } // namespace clang::dataflow
diff --git a/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp b/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
index e168f0ef1378e45..a61e692088a8717 100644
--- a/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
@@ -9,12 +9,15 @@
 #include <utility>
 
 #include "TestingSupport.h"
+#include "clang/Analysis/FlowSensitive/Arena.h"
 #include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/Solver.h"
 #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
+#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <vector>
 
 namespace {
 
@@ -30,6 +33,21 @@ using testing::UnorderedElementsAre;
 constexpr auto AssignedTrue = Solver::Result::Assignment::AssignedTrue;
 constexpr auto AssignedFalse = Solver::Result::Assignment::AssignedFalse;
 
+std::vector<const Formula *> parseAll(Arena &A, StringRef Lines) {
+  std::vector<const Formula *> Result;
+  while (!Lines.empty()) {
+    auto [First, Rest] = Lines.split('\n');
+    Lines = Rest;
+    if (First.trim().empty())
+      continue;
+    if (auto F = A.parseFormula(First))
+      Result.push_back(&*F);
+    else
+      ADD_FAILURE() << llvm::toString(F.takeError());
+  }
+  return Result;
+}
+
 // Checks if the conjunction of `Vals` is satisfiable and returns the
 // corresponding result.
 Solver::Result solve(llvm::ArrayRef<const Formula *> Vals) {
@@ -258,114 +276,71 @@ TEST(SolverTest, IffWithUnits) {
 }
 
 TEST(SolverTest, IffWithUnitsConflict) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto XEqY = Ctx.iff(X, Y);
-  auto NotY = Ctx.neg(Y);
-
-  // (X <=> Y) ^ X  !Y
-  EXPECT_THAT(solve({XEqY, X, NotY}), unsat());
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (V0 = V1)
+     V0
+     !V1
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, IffTransitiveConflict) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto Z = Ctx.atom();
-  auto XEqY = Ctx.iff(X, Y);
-  auto YEqZ = Ctx.iff(Y, Z);
-  auto NotX = Ctx.neg(X);
-
-  // (X <=> Y) ^ (Y <=> Z) ^ Z ^ !X
-  EXPECT_THAT(solve({XEqY, YEqZ, Z, NotX}), unsat());
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (V0 = V1)
+     (V1 = V2)
+     V2
+     !V0
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, DeMorgan) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto Z = Ctx.atom();
-  auto W = Ctx.atom();
-
-  // !(X v Y) <=> !X ^ !Y
-  auto A = Ctx.iff(Ctx.neg(Ctx.disj(X, Y)), Ctx.conj(Ctx.neg(X), Ctx.neg(Y)));
-
-  // !(Z ^ W) <=> !Z v !W
-  auto B = Ctx.iff(Ctx.neg(Ctx.conj(Z, W)), Ctx.disj(Ctx.neg(Z), Ctx.neg(W)));
-
-  // A ^ B
-  EXPECT_THAT(solve({A, B}), sat(_));
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (!(V0 | V1) = (!V0 & !V1))
+     (!(V2 & V3) = (!V2 | !V3))
+  )");
+  EXPECT_THAT(solve(Constraints), sat(_));
 }
 
 TEST(SolverTest, RespectsAdditionalConstraints) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto XEqY = Ctx.iff(X, Y);
-  auto NotY = Ctx.neg(Y);
-
-  // (X <=> Y) ^ X ^ !Y
-  EXPECT_THAT(solve({XEqY, X, NotY}), unsat());
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (V0 = V1)
+     V0
+     !V1
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, ImplicationIsEquivalentToDNF) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto XImpliesY = Ctx.impl(X, Y);
-  auto XImpliesYDNF = Ctx.disj(Ctx.neg(X), Y);
-  auto NotEquivalent = Ctx.neg(Ctx.iff(XImpliesY, XImpliesYDNF));
-
-  // !((X => Y) <=> (!X v Y))
-  EXPECT_THAT(solve({NotEquivalent}), unsat());
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     !((V0 => V1) = (!V0 | V1))
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, ImplicationConflict) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto *XImplY = Ctx.impl(X, Y);
-  auto *XAndNotY = Ctx.conj(X, Ctx.neg(Y));
-
-  // X => Y ^ X ^ !Y
-  EXPECT_THAT(solve({XImplY, XAndNotY}), unsat());
-}
-
-TEST(SolverTest, LowTimeoutResultsInTimedOut) {
-  WatchedLiteralsSolver solver(10);
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto Z = Ctx.atom();
-  auto W = Ctx.atom();
-
-  // !(X v Y) <=> !X ^ !Y
-  auto A = Ctx.iff(Ctx.neg(Ctx.disj(X, Y)), Ctx.conj(Ctx.neg(X), Ctx.neg(Y)));
-
-  // !(Z ^ W) <=> !Z v !W
-  auto B = Ctx.iff(Ctx.neg(Ctx.conj(Z, W)), Ctx.disj(Ctx.neg(Z), Ctx.neg(W)));
-
-  // A ^ B
-  EXPECT_EQ(solver.solve({A, B}).getStatus(), Solver::Result::Status::TimedOut);
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (V0 => V1)
+     (V0 & !V1)
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, ReachedLimitsReflectsTimeouts) {
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (!(V0 | V1) = (!V0 & !V1))
+     (!(V2 & V3) = (!V2 & !V3))
+  )");
   WatchedLiteralsSolver solver(10);
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto Z = Ctx.atom();
-  auto W = Ctx.atom();
-
-  // !(X v Y) <=> !X ^ !Y
-  auto A = Ctx.iff(Ctx.neg(Ctx.disj(X, Y)), Ctx.conj(Ctx.neg(X), Ctx.neg(Y)));
-
-  // !(Z ^ W) <=> !Z v !W
-  auto B = Ctx.iff(Ctx.neg(Ctx.conj(Z, W)), Ctx.disj(Ctx.neg(Z), Ctx.neg(W)));
-
-  // A ^ B
-  ASSERT_EQ(solver.solve({A, B}).getStatus(), Solver::Result::Status::TimedOut);
+  ASSERT_EQ(solver.solve(Constraints).getStatus(),
+            Solver::Result::Status::TimedOut);
   EXPECT_TRUE(solver.reachedLimit());
 }
 



More information about the cfe-commits mailing list