[clang] b50c87d - [clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.
Martin Braenne via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 4 23:23:11 PDT 2023
Author: Burak Emir
Date: 2023-09-05T06:23:04Z
New Revision: b50c87d1e63f187105b5b73f7add37717ccce7f6
URL: https://github.com/llvm/llvm-project/commit/b50c87d1e63f187105b5b73f7add37717ccce7f6
DIFF: https://github.com/llvm/llvm-project/commit/b50c87d1e63f187105b5b73f7add37717ccce7f6.diff
LOG: [clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.
In dataflow analysis, SAT solver: simplify formula during CNF construction and short-cut
solving when the formula has been recognized as contradictory.
Reviewed By: sammccall
Differential Revision: https://reviews.llvm.org/D158407
Added:
Modified:
clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
index 037886d09c4f7dd..05ae36ddbc898ec 100644
--- a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
+++ b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include <cassert>
+#include <cstddef>
#include <cstdint>
-#include <iterator>
#include <queue>
#include <vector>
@@ -23,8 +23,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+
namespace clang {
namespace dataflow {
@@ -62,6 +64,10 @@ static constexpr Literal NullLit = 0;
/// Returns the positive literal `V`.
static constexpr Literal posLit(Variable V) { return 2 * V; }
+static constexpr bool isPosLit(Literal L) { return 0 == (L & 1); }
+
+static constexpr bool isNegLit(Literal L) { return 1 == (L & 1); }
+
/// Returns the negative literal `!V`.
static constexpr Literal negLit(Variable V) { return 2 * V + 1; }
@@ -125,9 +131,14 @@ struct CNFFormula {
/// formula.
llvm::DenseMap<Variable, Atom> Atomics;
+ /// Indicates that we already know the formula is unsatisfiable.
+ /// During construction, we catch simple cases of conflicting unit-clauses.
+ bool KnownContradictory;
+
explicit CNFFormula(Variable LargestVar,
llvm::DenseMap<Variable, Atom> Atomics)
- : LargestVar(LargestVar), Atomics(std::move(Atomics)) {
+ : LargestVar(LargestVar), Atomics(std::move(Atomics)),
+ KnownContradictory(false) {
Clauses.push_back(0);
ClauseStarts.push_back(0);
NextWatched.push_back(0);
@@ -135,33 +146,24 @@ struct CNFFormula {
WatchedHead.resize(NumLiterals + 1, 0);
}
- /// Adds the `L1 v L2 v L3` clause to the formula. If `L2` or `L3` are
- /// `NullLit` they are respectively omitted from the clause.
- ///
+ /// Adds the `L1 v ... v Ln` clause to the formula.
/// Requirements:
///
- /// `L1` must not be `NullLit`.
+ /// `Li` must not be `NullLit`.
///
/// All literals in the input that are not `NullLit` must be distinct.
- void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) {
- // The literals are guaranteed to be distinct from properties of Formula
- // and the construction in `buildCNF`.
- assert(L1 != NullLit && L1 != L2 && L1 != L3 &&
- (L2 != L3 || L2 == NullLit));
+ void addClause(ArrayRef<Literal> lits) {
+ assert(!lits.empty());
+ assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; }));
const ClauseID C = ClauseStarts.size();
const size_t S = Clauses.size();
ClauseStarts.push_back(S);
-
- Clauses.push_back(L1);
- if (L2 != NullLit)
- Clauses.push_back(L2);
- if (L3 != NullLit)
- Clauses.push_back(L3);
+ Clauses.insert(Clauses.end(), lits.begin(), lits.end());
// Designate the first literal as the "watched" literal of the clause.
- NextWatched.push_back(WatchedHead[L1]);
- WatchedHead[L1] = C;
+ NextWatched.push_back(WatchedHead[lits.front()]);
+ WatchedHead[lits.front()] = C;
}
/// Returns the number of literals in clause `C`.
@@ -176,6 +178,84 @@ struct CNFFormula {
}
};
+/// Applies simplifications while building up a BooleanFormula.
+/// We keep track of unit clauses, which tell us variables that must be
+/// true/false in any model that satisfies the overall formula.
+/// Such variables can be dropped from subsequently-added clauses, which
+/// may in turn yield more unit clauses or even a contradiction.
+/// The total added complexity of this preprocessing is O(N) where we
+/// for every clause, we do a lookup for each unit clauses.
+/// The lookup is O(1) on average. This method won't catch all
+/// contradictory formulas, more passes can in principle catch
+/// more cases but we leave all these and the general case to the
+/// proper SAT solver.
+struct CNFFormulaBuilder {
+ // Formula should outlive CNFFormulaBuilder.
+ explicit CNFFormulaBuilder(CNFFormula &CNF)
+ : Formula(CNF) {}
+
+ /// Adds the `L1 v ... v Ln` clause to the formula. Applies
+ /// simplifications, based on single-literal clauses.
+ ///
+ /// Requirements:
+ ///
+ /// `Li` must not be `NullLit`.
+ ///
+ /// All literals must be distinct.
+ void addClause(ArrayRef<Literal> Literals) {
+ // We generate clauses with up to 3 literals in this file.
+ assert(!Literals.empty() && Literals.size() <= 3);
+ // Contains literals of the simplified clause.
+ llvm::SmallVector<Literal> Simplified;
+ for (auto L : Literals) {
+ assert(L != NullLit &&
+ llvm::all_of(Simplified,
+ [L](Literal S) { return S != L; }));
+ auto X = var(L);
+ if (trueVars.contains(X)) { // X must be true
+ if (isPosLit(L))
+ return; // Omit clause `(... v X v ...)`, it is `true`.
+ else
+ continue; // Omit `!X` from `(... v !X v ...)`.
+ }
+ if (falseVars.contains(X)) { // X must be false
+ if (isNegLit(L))
+ return; // Omit clause `(... v !X v ...)`, it is `true`.
+ else
+ continue; // Omit `X` from `(... v X v ...)`.
+ }
+ Simplified.push_back(L);
+ }
+ if (Simplified.empty()) {
+ // Simplification made the clause empty, which is equivalent to `false`.
+ // We already know that this formula is unsatisfiable.
+ Formula.KnownContradictory = true;
+ // We can add any of the input literals to get an unsatisfiable formula.
+ Formula.addClause(Literals[0]);
+ return;
+ }
+ if (Simplified.size() == 1) {
+ // We have new unit clause.
+ const Literal lit = Simplified.front();
+ const Variable v = var(lit);
+ if (isPosLit(lit))
+ trueVars.insert(v);
+ else
+ falseVars.insert(v);
+ }
+ Formula.addClause(Simplified);
+ }
+
+ /// Returns true if we observed a contradiction while adding clauses.
+ /// In this case then the formula is already known to be unsatisfiable.
+ bool isKnownContradictory() { return Formula.KnownContradictory; }
+
+private:
+ CNFFormula &Formula;
+ llvm::DenseSet<Variable> trueVars;
+ llvm::DenseSet<Variable> falseVars;
+};
+
/// Converts the conjunction of `Vals` into a formula in conjunctive normal
/// form where each clause has at least one and at most three literals.
CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
@@ -218,11 +298,12 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
CNFFormula CNF(NextVar - 1, std::move(Atomics));
std::vector<bool> ProcessedSubVals(NextVar, false);
+ CNFFormulaBuilder builder(CNF);
- // Add a conjunct for each variable that represents a top-level formula in
- // `Vals`.
+ // Add a conjunct for each variable that represents a top-level conjunction
+ // value in `Vals`.
for (const Formula *Val : Vals)
- CNF.addClause(posLit(GetVar(Val)));
+ builder.addClause(posLit(GetVar(Val)));
// Add conjuncts that represent the mapping between newly-created variables
// and their corresponding sub-formulas.
@@ -249,15 +330,15 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
// `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is
// already in conjunctive normal form. Below we add each of the
// conjuncts of the latter expression to the result.
- CNF.addClause(negLit(Var), posLit(LHS));
- CNF.addClause(posLit(Var), negLit(LHS));
+ builder.addClause({negLit(Var), posLit(LHS)});
+ builder.addClause({posLit(Var), negLit(LHS)});
} else {
- // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v !B)`
- // which is already in conjunctive normal form. Below we add each of the
- // conjuncts of the latter expression to the result.
- CNF.addClause(negLit(Var), posLit(LHS));
- CNF.addClause(negLit(Var), posLit(RHS));
- CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS));
+ // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v
+ // !B)` which is already in conjunctive normal form. Below we add each
+ // of the conjuncts of the latter expression to the result.
+ builder.addClause({negLit(Var), posLit(LHS)});
+ builder.addClause({negLit(Var), posLit(RHS)});
+ builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
}
break;
}
@@ -269,15 +350,15 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
// `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is
// already in conjunctive normal form. Below we add each of the
// conjuncts of the latter expression to the result.
- CNF.addClause(negLit(Var), posLit(LHS));
- CNF.addClause(posLit(Var), negLit(LHS));
+ builder.addClause({negLit(Var), posLit(LHS)});
+ builder.addClause({posLit(Var), negLit(LHS)});
} else {
// `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v
// !B)` which is already in conjunctive normal form. Below we add each
// of the conjuncts of the latter expression to the result.
- CNF.addClause(negLit(Var), posLit(LHS), posLit(RHS));
- CNF.addClause(posLit(Var), negLit(LHS));
- CNF.addClause(posLit(Var), negLit(RHS));
+ builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)});
+ builder.addClause({posLit(Var), negLit(LHS)});
+ builder.addClause({posLit(Var), negLit(RHS)});
}
break;
}
@@ -287,8 +368,8 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
// `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is
// already in conjunctive normal form. Below we add each of the
// conjuncts of the latter expression to the result.
- CNF.addClause(negLit(Var), negLit(Operand));
- CNF.addClause(posLit(Var), posLit(Operand));
+ builder.addClause({negLit(Var), negLit(Operand)});
+ builder.addClause({posLit(Var), posLit(Operand)});
break;
}
case Formula::Implies: {
@@ -299,9 +380,9 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
// `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in
// conjunctive normal form. Below we add each of the conjuncts of
// the latter expression to the result.
- CNF.addClause(posLit(Var), posLit(LHS));
- CNF.addClause(posLit(Var), negLit(RHS));
- CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS));
+ builder.addClause({posLit(Var), posLit(LHS)});
+ builder.addClause({posLit(Var), negLit(RHS)});
+ builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
break;
}
case Formula::Equal: {
@@ -309,10 +390,10 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
const Variable RHS = GetVar(Val->operands()[1]);
if (LHS == RHS) {
- // `X <=> (A <=> A)` is equvalent to `X` which is already in
+ // `X <=> (A <=> A)` is equivalent to `X` which is already in
// conjunctive normal form. Below we add each of the conjuncts of the
// latter expression to the result.
- CNF.addClause(posLit(Var));
+ builder.addClause(posLit(Var));
// No need to visit the sub-values of `Val`.
continue;
@@ -321,18 +402,43 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
// `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which
// is already in conjunctive normal form. Below we add each of the
// conjuncts of the latter expression to the result.
- CNF.addClause(posLit(Var), posLit(LHS), posLit(RHS));
- CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS));
- CNF.addClause(negLit(Var), posLit(LHS), negLit(RHS));
- CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS));
+ builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)});
+ builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
+ builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)});
+ builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
break;
}
}
+ if (builder.isKnownContradictory()) {
+ return CNF;
+ }
for (const Formula *Child : Val->operands())
UnprocessedSubVals.push(Child);
}
- return CNF;
+ // Unit clauses that were added later were not
+ // considered for the simplification of earlier clauses. Do a final
+ // pass to find more opportunities for simplification.
+ CNFFormula FinalCNF(NextVar - 1, std::move(CNF.Atomics));
+ CNFFormulaBuilder FinalBuilder(FinalCNF);
+
+ // Collect unit clauses.
+ for (ClauseID C = 1; C < CNF.ClauseStarts.size(); ++C) {
+ if (CNF.clauseSize(C) == 1) {
+ FinalBuilder.addClause(CNF.clauseLiterals(C)[0]);
+ }
+ }
+
+ // Add all clauses that were added previously, preserving the order.
+ for (ClauseID C = 1; C < CNF.ClauseStarts.size(); ++C) {
+ FinalBuilder.addClause(CNF.clauseLiterals(C));
+ if (FinalBuilder.isKnownContradictory()) {
+ break;
+ }
+ }
+ // It is possible there were new unit clauses again, but
+ // we stop here and leave the rest to the solver algorithm.
+ return FinalCNF;
}
class WatchedLiteralsSolverImpl {
@@ -414,6 +520,11 @@ class WatchedLiteralsSolverImpl {
// Returns the `Result` and the number of iterations "remaining" from
// `MaxIterations` (that is, `MaxIterations` - iterations in this call).
std::pair<Solver::Result, std::int64_t> solve(std::int64_t MaxIterations) && {
+ if (CNF.KnownContradictory) {
+ // Short-cut the solving process. We already found out at CNF
+ // construction time that the formula is unsatisfiable.
+ return std::make_pair(Solver::Result::Unsatisfiable(), MaxIterations);
+ }
size_t I = 0;
while (I < ActiveVars.size()) {
if (MaxIterations == 0)
@@ -503,7 +614,8 @@ class WatchedLiteralsSolverImpl {
++I;
}
}
- return std::make_pair(Solver::Result::Satisfiable(buildSolution()), MaxIterations);
+ return std::make_pair(Solver::Result::Satisfiable(buildSolution()),
+ MaxIterations);
}
private:
@@ -672,8 +784,7 @@ Solver::Result
WatchedLiteralsSolver::solve(llvm::ArrayRef<const Formula *> Vals) {
if (Vals.empty())
return Solver::Result::Satisfiable({{}});
- auto [Res, Iterations] =
- WatchedLiteralsSolverImpl(Vals).solve(MaxIterations);
+ auto [Res, Iterations] = WatchedLiteralsSolverImpl(Vals).solve(MaxIterations);
MaxIterations = Iterations;
return Res;
}
diff --git a/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp b/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
index 28f4adb2d39354c..e168f0ef1378e45 100644
--- a/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
@@ -369,4 +369,32 @@ TEST(SolverTest, ReachedLimitsReflectsTimeouts) {
EXPECT_TRUE(solver.reachedLimit());
}
+TEST(SolverTest, SimpleButLargeContradiction) {
+ // This test ensures that the solver takes a short-cut on known
+ // contradictory inputs, without using max_iterations. At the time
+ // this test is added, formulas that are easily recognized to be
+ // contradictory at CNF construction time would lead to timeout.
+ WatchedLiteralsSolver solver(10);
+ ConstraintContext Ctx;
+ auto first = Ctx.atom();
+ auto last = first;
+ for (int i = 1; i < 10000; ++i) {
+ last = Ctx.conj(last, Ctx.atom());
+ }
+ last = Ctx.conj(Ctx.neg(first), last);
+ ASSERT_EQ(solver.solve({last}).getStatus(),
+ Solver::Result::Status::Unsatisfiable);
+ EXPECT_FALSE(solver.reachedLimit());
+
+ first = Ctx.atom();
+ last = Ctx.neg(first);
+ for (int i = 1; i < 10000; ++i) {
+ last = Ctx.conj(last, Ctx.neg(Ctx.atom()));
+ }
+ last = Ctx.conj(first, last);
+ ASSERT_EQ(solver.solve({last}).getStatus(),
+ Solver::Result::Status::Unsatisfiable);
+ EXPECT_FALSE(solver.reachedLimit());
+}
+
} // namespace
More information about the cfe-commits
mailing list