[clang] [dataflow] Parse formulas from text (PR #66424)

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 19 02:54:51 PDT 2023


https://github.com/sam-mccall updated https://github.com/llvm/llvm-project/pull/66424

>From 35b33561af918fc2e13af9de18af6b181535ce49 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall at gmail.com>
Date: Thu, 14 Sep 2023 21:36:50 +0200
Subject: [PATCH 1/2] [dataflow] Parse formulas from text

My immediate use for this is not in checked-in code, but rather the
ability to plug printed flow conditions (from analysis logs) back into
sat solver unittests to reproduce slowness.

It does allow simplifying some of the existing solver tests, though.
---
 .../clang/Analysis/FlowSensitive/Arena.h      |   5 +
 .../clang/Analysis/FlowSensitive/Formula.h    |   3 +
 clang/lib/Analysis/FlowSensitive/Arena.cpp    |  93 +++++++++++
 clang/lib/Analysis/FlowSensitive/Formula.cpp  |   1 +
 .../Analysis/FlowSensitive/ArenaTest.cpp      |  44 +++++
 .../Analysis/FlowSensitive/SolverTest.cpp     | 153 ++++++++----------
 6 files changed, 210 insertions(+), 89 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/Arena.h b/clang/include/clang/Analysis/FlowSensitive/Arena.h
index 373697dc7379c53..4e07053aae1af53 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Arena.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Arena.h
@@ -11,6 +11,7 @@
 #include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
+#include "llvm/ADT/StringRef.h"
 #include <vector>
 
 namespace clang::dataflow {
@@ -109,6 +110,10 @@ class Arena {
     return makeAtomRef(Value ? True : False);
   }
 
+  // Parses a formula from its textual representation.
+  // This may refer to atoms that were not produced by makeAtom() yet!
+  llvm::Expected<const Formula &> parseFormula(llvm::StringRef);
+
   /// Returns a new atomic boolean variable, distinct from any other.
   Atom makeAtom() { return static_cast<Atom>(NextAtom++); };
 
diff --git a/clang/include/clang/Analysis/FlowSensitive/Formula.h b/clang/include/clang/Analysis/FlowSensitive/Formula.h
index 64fe8f5b630a0f7..ac95a9ac7d50a1c 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Formula.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Formula.h
@@ -87,6 +87,9 @@ class alignas(const Formula *) Formula {
                          ArrayRef<const Formula *> Operands,
                          unsigned Value = 0);
 
+  // Parse Formulas using Arena rather than caling this function directly.
+  static Formula *parse(llvm::BumpPtrAllocator &Alloc, llvm::StringRef &);
+
 private:
   Formula() = default;
   Formula(const Formula &) = delete;
diff --git a/clang/lib/Analysis/FlowSensitive/Arena.cpp b/clang/lib/Analysis/FlowSensitive/Arena.cpp
index a12da2d9b555eb7..557feb373287632 100644
--- a/clang/lib/Analysis/FlowSensitive/Arena.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Arena.cpp
@@ -7,7 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Analysis/FlowSensitive/Arena.h"
+#include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
+#include "llvm/Support/Error.h"
+#include <string>
 
 namespace clang::dataflow {
 
@@ -95,4 +98,94 @@ BoolValue &Arena::makeBoolValue(const Formula &F) {
   return *It->second;
 }
 
+namespace {
+const Formula *parse(Arena &A, llvm::StringRef &In) {
+  auto EatWhitespace = [&] { In = In.ltrim(' '); };
+  EatWhitespace();
+
+  if (In.consume_front("!")) {
+    if (auto *Arg = parse(A, In))
+      return &A.makeNot(*Arg);
+    return nullptr;
+  }
+
+  if (In.consume_front("(")) {
+    auto *Arg1 = parse(A, In);
+    if (!Arg1)
+      return nullptr;
+
+    EatWhitespace();
+    decltype(&Arena::makeOr) Op;
+    if (In.consume_front("|"))
+      Op = &Arena::makeOr;
+    else if (In.consume_front("&"))
+      Op = &Arena::makeAnd;
+    else if (In.consume_front("=>"))
+      Op = &Arena::makeImplies;
+    else if (In.consume_front("="))
+      Op = &Arena::makeEquals;
+    else
+      return nullptr;
+
+    auto *Arg2 = parse(A, In);
+    if (!Arg2)
+      return nullptr;
+
+    EatWhitespace();
+    if (!In.consume_front(")"))
+      return nullptr;
+
+    return &(A.*Op)(*Arg1, *Arg2);
+  }
+
+  if (In.consume_front("V")) {
+    std::underlying_type_t<Atom> At;
+    if (In.consumeInteger(10, At))
+      return nullptr;
+    return &A.makeAtomRef(static_cast<Atom>(At));
+  }
+
+  if (In.consume_front("true"))
+    return &A.makeLiteral(true);
+  if (In.consume_front("false"))
+    return &A.makeLiteral(false);
+
+  return nullptr;
+}
+
+class FormulaParseError : public llvm::ErrorInfo<FormulaParseError> {
+  unsigned Offset;
+  std::string Formula;
+
+public:
+  static char ID;
+  FormulaParseError(llvm::StringRef Formula, unsigned Offset)
+      : Offset(Offset), Formula(Formula) {}
+
+  void log(raw_ostream &OS) const override {
+    OS << "bad formula at offset " << Offset << "\n";
+    OS << Formula << "\n";
+    OS.indent(Offset) << "^";
+  }
+
+  std::error_code convertToErrorCode() const override {
+    return std::make_error_code(std::errc::invalid_argument);
+  }
+};
+
+char FormulaParseError::ID = 0;
+
+} // namespace
+
+llvm::Expected<const Formula &> Arena::parseFormula(llvm::StringRef In) {
+  llvm::StringRef Rest = In;
+  auto *Result = parse(*this, Rest);
+  if (!Result) // parse() hit something unparseable
+    return llvm::make_error<FormulaParseError>(In, In.size() - Rest.size());
+  Rest = Rest.ltrim();
+  if (!Rest.empty()) // parse didn't consume all the input
+    return llvm::make_error<FormulaParseError>(In, In.size() - Rest.size());
+  return *Result;
+}
+
 } // namespace clang::dataflow
diff --git a/clang/lib/Analysis/FlowSensitive/Formula.cpp b/clang/lib/Analysis/FlowSensitive/Formula.cpp
index 504ad2fb7938aff..6d22efc5db07be4 100644
--- a/clang/lib/Analysis/FlowSensitive/Formula.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Formula.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
+#include <type_traits>
 
 namespace clang::dataflow {
 
diff --git a/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp b/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp
index 0f8552a58787cb4..1fc2ed2c813addc 100644
--- a/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp
@@ -8,11 +8,14 @@
 
 #include "clang/Analysis/FlowSensitive/Arena.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Testing/Support/Error.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang::dataflow {
 namespace {
+using llvm::HasValue;
+using testing::Ref;
 
 class ArenaTest : public ::testing::Test {
 protected:
@@ -137,5 +140,46 @@ TEST_F(ArenaTest, Interning) {
   EXPECT_EQ(&B1.formula(), &F1);
 }
 
+TEST_F(ArenaTest, Parse) {
+  Atom V5{5};
+  Atom V6{6};
+  EXPECT_THAT_EXPECTED(A.parseFormula("V5"), HasValue(Ref(A.makeAtomRef(V5))));
+  EXPECT_THAT_EXPECTED(A.parseFormula("true"),
+                       HasValue(Ref(A.makeLiteral(true))));
+  EXPECT_THAT_EXPECTED(A.parseFormula("!V5"),
+                       HasValue(Ref(A.makeNot(A.makeAtomRef(V5)))));
+
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V5 = V6)"),
+      HasValue(Ref(A.makeEquals(A.makeAtomRef(V5), A.makeAtomRef(V6)))));
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V5 => V6)"),
+      HasValue(Ref(A.makeImplies(A.makeAtomRef(V5), A.makeAtomRef(V6)))));
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V5 & V6)"),
+      HasValue(Ref(A.makeAnd(A.makeAtomRef(V5), A.makeAtomRef(V6)))));
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V5 | V6)"),
+      HasValue(Ref(A.makeOr(A.makeAtomRef(V5), A.makeAtomRef(V6)))));
+
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("((V5 & (V6 & !false)) => ((V5 | V6) | false))"),
+      HasValue(Ref(
+          A.makeImplies(A.makeAnd(A.makeAtomRef(V5),
+                                  A.makeAnd(A.makeAtomRef(V6),
+                                            A.makeNot(A.makeLiteral(false)))),
+                        A.makeOr(A.makeOr(A.makeAtomRef(V5), A.makeAtomRef(V6)),
+                                 A.makeLiteral(false))))));
+
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("(V0 => error)"), llvm::FailedWithMessage(R"(bad formula at offset 7
+(V0 => error)
+       ^)"));
+  EXPECT_THAT_EXPECTED(
+      A.parseFormula("V1 V2"), llvm::FailedWithMessage(R"(bad formula at offset 3
+V1 V2
+   ^)"));
+}
+
 } // namespace
 } // namespace clang::dataflow
diff --git a/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp b/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
index e168f0ef1378e45..a61e692088a8717 100644
--- a/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
@@ -9,12 +9,15 @@
 #include <utility>
 
 #include "TestingSupport.h"
+#include "clang/Analysis/FlowSensitive/Arena.h"
 #include "clang/Analysis/FlowSensitive/Formula.h"
 #include "clang/Analysis/FlowSensitive/Solver.h"
 #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h"
+#include "clang/Basic/LLVM.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <vector>
 
 namespace {
 
@@ -30,6 +33,21 @@ using testing::UnorderedElementsAre;
 constexpr auto AssignedTrue = Solver::Result::Assignment::AssignedTrue;
 constexpr auto AssignedFalse = Solver::Result::Assignment::AssignedFalse;
 
+std::vector<const Formula *> parseAll(Arena &A, StringRef Lines) {
+  std::vector<const Formula *> Result;
+  while (!Lines.empty()) {
+    auto [First, Rest] = Lines.split('\n');
+    Lines = Rest;
+    if (First.trim().empty())
+      continue;
+    if (auto F = A.parseFormula(First))
+      Result.push_back(&*F);
+    else
+      ADD_FAILURE() << llvm::toString(F.takeError());
+  }
+  return Result;
+}
+
 // Checks if the conjunction of `Vals` is satisfiable and returns the
 // corresponding result.
 Solver::Result solve(llvm::ArrayRef<const Formula *> Vals) {
@@ -258,114 +276,71 @@ TEST(SolverTest, IffWithUnits) {
 }
 
 TEST(SolverTest, IffWithUnitsConflict) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto XEqY = Ctx.iff(X, Y);
-  auto NotY = Ctx.neg(Y);
-
-  // (X <=> Y) ^ X  !Y
-  EXPECT_THAT(solve({XEqY, X, NotY}), unsat());
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (V0 = V1)
+     V0
+     !V1
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, IffTransitiveConflict) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto Z = Ctx.atom();
-  auto XEqY = Ctx.iff(X, Y);
-  auto YEqZ = Ctx.iff(Y, Z);
-  auto NotX = Ctx.neg(X);
-
-  // (X <=> Y) ^ (Y <=> Z) ^ Z ^ !X
-  EXPECT_THAT(solve({XEqY, YEqZ, Z, NotX}), unsat());
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (V0 = V1)
+     (V1 = V2)
+     V2
+     !V0
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, DeMorgan) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto Z = Ctx.atom();
-  auto W = Ctx.atom();
-
-  // !(X v Y) <=> !X ^ !Y
-  auto A = Ctx.iff(Ctx.neg(Ctx.disj(X, Y)), Ctx.conj(Ctx.neg(X), Ctx.neg(Y)));
-
-  // !(Z ^ W) <=> !Z v !W
-  auto B = Ctx.iff(Ctx.neg(Ctx.conj(Z, W)), Ctx.disj(Ctx.neg(Z), Ctx.neg(W)));
-
-  // A ^ B
-  EXPECT_THAT(solve({A, B}), sat(_));
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (!(V0 | V1) = (!V0 & !V1))
+     (!(V2 & V3) = (!V2 | !V3))
+  )");
+  EXPECT_THAT(solve(Constraints), sat(_));
 }
 
 TEST(SolverTest, RespectsAdditionalConstraints) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto XEqY = Ctx.iff(X, Y);
-  auto NotY = Ctx.neg(Y);
-
-  // (X <=> Y) ^ X ^ !Y
-  EXPECT_THAT(solve({XEqY, X, NotY}), unsat());
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (V0 = V1)
+     V0
+     !V1
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, ImplicationIsEquivalentToDNF) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto XImpliesY = Ctx.impl(X, Y);
-  auto XImpliesYDNF = Ctx.disj(Ctx.neg(X), Y);
-  auto NotEquivalent = Ctx.neg(Ctx.iff(XImpliesY, XImpliesYDNF));
-
-  // !((X => Y) <=> (!X v Y))
-  EXPECT_THAT(solve({NotEquivalent}), unsat());
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     !((V0 => V1) = (!V0 | V1))
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, ImplicationConflict) {
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto *XImplY = Ctx.impl(X, Y);
-  auto *XAndNotY = Ctx.conj(X, Ctx.neg(Y));
-
-  // X => Y ^ X ^ !Y
-  EXPECT_THAT(solve({XImplY, XAndNotY}), unsat());
-}
-
-TEST(SolverTest, LowTimeoutResultsInTimedOut) {
-  WatchedLiteralsSolver solver(10);
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto Z = Ctx.atom();
-  auto W = Ctx.atom();
-
-  // !(X v Y) <=> !X ^ !Y
-  auto A = Ctx.iff(Ctx.neg(Ctx.disj(X, Y)), Ctx.conj(Ctx.neg(X), Ctx.neg(Y)));
-
-  // !(Z ^ W) <=> !Z v !W
-  auto B = Ctx.iff(Ctx.neg(Ctx.conj(Z, W)), Ctx.disj(Ctx.neg(Z), Ctx.neg(W)));
-
-  // A ^ B
-  EXPECT_EQ(solver.solve({A, B}).getStatus(), Solver::Result::Status::TimedOut);
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (V0 => V1)
+     (V0 & !V1)
+  )");
+  EXPECT_THAT(solve(Constraints), unsat());
 }
 
 TEST(SolverTest, ReachedLimitsReflectsTimeouts) {
+  Arena A;
+  auto Constraints = parseAll(A, R"(
+     (!(V0 | V1) = (!V0 & !V1))
+     (!(V2 & V3) = (!V2 & !V3))
+  )");
   WatchedLiteralsSolver solver(10);
-  ConstraintContext Ctx;
-  auto X = Ctx.atom();
-  auto Y = Ctx.atom();
-  auto Z = Ctx.atom();
-  auto W = Ctx.atom();
-
-  // !(X v Y) <=> !X ^ !Y
-  auto A = Ctx.iff(Ctx.neg(Ctx.disj(X, Y)), Ctx.conj(Ctx.neg(X), Ctx.neg(Y)));
-
-  // !(Z ^ W) <=> !Z v !W
-  auto B = Ctx.iff(Ctx.neg(Ctx.conj(Z, W)), Ctx.disj(Ctx.neg(Z), Ctx.neg(W)));
-
-  // A ^ B
-  ASSERT_EQ(solver.solve({A, B}).getStatus(), Solver::Result::Status::TimedOut);
+  ASSERT_EQ(solver.solve(Constraints).getStatus(),
+            Solver::Result::Status::TimedOut);
   EXPECT_TRUE(solver.reachedLimit());
 }
 

>From 2942016da0f38a8a5f8175d9dc98cacf211f1ce5 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall at gmail.com>
Date: Tue, 19 Sep 2023 11:53:45 +0200
Subject: [PATCH 2/2] Address review comments

Drop leftover parse() decl from header.
EatWhitespace -> EatSpaces
FIXME about general var names
Style nits
---
 .../include/clang/Analysis/FlowSensitive/Formula.h |  3 ---
 clang/lib/Analysis/FlowSensitive/Arena.cpp         | 14 ++++++++------
 .../unittests/Analysis/FlowSensitive/ArenaTest.cpp |  2 +-
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/Formula.h b/clang/include/clang/Analysis/FlowSensitive/Formula.h
index ac95a9ac7d50a1c..64fe8f5b630a0f7 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Formula.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Formula.h
@@ -87,9 +87,6 @@ class alignas(const Formula *) Formula {
                          ArrayRef<const Formula *> Operands,
                          unsigned Value = 0);
 
-  // Parse Formulas using Arena rather than caling this function directly.
-  static Formula *parse(llvm::BumpPtrAllocator &Alloc, llvm::StringRef &);
-
 private:
   Formula() = default;
   Formula(const Formula &) = delete;
diff --git a/clang/lib/Analysis/FlowSensitive/Arena.cpp b/clang/lib/Analysis/FlowSensitive/Arena.cpp
index 557feb373287632..b043a52b609df36 100644
--- a/clang/lib/Analysis/FlowSensitive/Arena.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Arena.cpp
@@ -100,8 +100,8 @@ BoolValue &Arena::makeBoolValue(const Formula &F) {
 
 namespace {
 const Formula *parse(Arena &A, llvm::StringRef &In) {
-  auto EatWhitespace = [&] { In = In.ltrim(' '); };
-  EatWhitespace();
+  auto EatSpaces = [&] { In = In.ltrim(' '); };
+  EatSpaces();
 
   if (In.consume_front("!")) {
     if (auto *Arg = parse(A, In))
@@ -114,7 +114,7 @@ const Formula *parse(Arena &A, llvm::StringRef &In) {
     if (!Arg1)
       return nullptr;
 
-    EatWhitespace();
+    EatSpaces();
     decltype(&Arena::makeOr) Op;
     if (In.consume_front("|"))
       Op = &Arena::makeOr;
@@ -131,13 +131,15 @@ const Formula *parse(Arena &A, llvm::StringRef &In) {
     if (!Arg2)
       return nullptr;
 
-    EatWhitespace();
+    EatSpaces();
     if (!In.consume_front(")"))
       return nullptr;
 
     return &(A.*Op)(*Arg1, *Arg2);
   }
 
+  // For now, only support unnamed variables V0, V1 etc.
+  // FIXME: parse e.g. "X" by allocating an atom and storing a name somewhere.
   if (In.consume_front("V")) {
     std::underlying_type_t<Atom> At;
     if (In.consumeInteger(10, At))
@@ -154,13 +156,13 @@ const Formula *parse(Arena &A, llvm::StringRef &In) {
 }
 
 class FormulaParseError : public llvm::ErrorInfo<FormulaParseError> {
-  unsigned Offset;
   std::string Formula;
+  unsigned Offset;
 
 public:
   static char ID;
   FormulaParseError(llvm::StringRef Formula, unsigned Offset)
-      : Offset(Offset), Formula(Formula) {}
+      : Formula(Formula), Offset(Offset) {}
 
   void log(raw_ostream &OS) const override {
     OS << "bad formula at offset " << Offset << "\n";
diff --git a/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp b/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp
index 1fc2ed2c813addc..1208b78a308d1df 100644
--- a/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/ArenaTest.cpp
@@ -140,7 +140,7 @@ TEST_F(ArenaTest, Interning) {
   EXPECT_EQ(&B1.formula(), &F1);
 }
 
-TEST_F(ArenaTest, Parse) {
+TEST_F(ArenaTest, ParseFormula) {
   Atom V5{5};
   Atom V6{6};
   EXPECT_THAT_EXPECTED(A.parseFormula("V5"), HasValue(Ref(A.makeAtomRef(V5))));



More information about the cfe-commits mailing list