[clang] 955a05a - [clang][dataflow] Optimize flow condition representation

Stanislav Gatev via cfe-commits cfe-commits at lists.llvm.org
Sun May 1 09:26:56 PDT 2022


Author: Stanislav Gatev
Date: 2022-05-01T16:25:29Z
New Revision: 955a05a2782ee19d6f4bce6bf5d1c4a8f591f0f3

URL: https://github.com/llvm/llvm-project/commit/955a05a2782ee19d6f4bce6bf5d1c4a8f591f0f3
DIFF: https://github.com/llvm/llvm-project/commit/955a05a2782ee19d6f4bce6bf5d1c4a8f591f0f3.diff

LOG: [clang][dataflow] Optimize flow condition representation

Enable efficient implementation of context-aware joining of distinct
boolean values. It can be used to join distinct boolean values while
preserving flow condition information.

Flow conditions are represented as Token <=> Clause iff formulas. To
perform context-aware joining, one can simply add the tokens of flow
conditions to the formula when joining distinct boolean values, e.g:
`makeOr(makeAnd(FC1, Val1), makeAnd(FC2, Val2))`. This significantly
simplifies the implementation of `Environment::join`.

This patch removes the `DataflowAnalysisContext::getSolver` method.
The `DataflowAnalysisContext::flowConditionImplies` method should be
used instead.

Reviewed-by: ymandel, xazax.hun

Differential Revision: https://reviews.llvm.org/D124395

Added: 
    

Modified: 
    clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
    clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
    clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
    clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
    clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
index 8df88301dff83..5cf681e4e489c 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
@@ -21,6 +21,7 @@
 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include <cassert>
 #include <memory>
 #include <type_traits>
@@ -45,9 +46,6 @@ class DataflowAnalysisContext {
     assert(this->S != nullptr);
   }
 
-  /// Returns the SAT solver instance that is available in this context.
-  Solver &getSolver() const { return *S; }
-
   /// Takes ownership of `Loc` and returns a reference to it.
   ///
   /// Requirements:
@@ -151,7 +149,39 @@ class DataflowAnalysisContext {
   /// calls with the same argument will return the same result.
   BoolValue &getOrCreateNegationValue(BoolValue &Val);
 
+  /// Creates a fresh flow condition and returns a token that identifies it. The
+  /// token can be used to perform various operations on the flow condition such
+  /// as adding constraints to it, forking it, joining it with another flow
+  /// condition, or checking implications.
+  AtomicBoolValue &makeFlowConditionToken();
+
+  /// Adds `Constraint` to the flow condition identified by `Token`.
+  void addFlowConditionConstraint(AtomicBoolValue &Token,
+                                  BoolValue &Constraint);
+
+  /// Creates a new flow condition with the same constraints as the flow
+  /// condition identified by `Token` and returns its token.
+  AtomicBoolValue &forkFlowCondition(AtomicBoolValue &Token);
+
+  /// Creates a new flow condition that represents the disjunction of the flow
+  /// conditions identified by `FirstToken` and `SecondToken`, and returns its
+  /// token.
+  AtomicBoolValue &joinFlowConditions(AtomicBoolValue &FirstToken,
+                                      AtomicBoolValue &SecondToken);
+
+  /// Returns true if and only if the constraints of the flow condition
+  /// identified by `Token` imply that `Val` is true.
+  bool flowConditionImplies(AtomicBoolValue &Token, BoolValue &Val);
+
 private:
+  /// Adds all constraints of the flow condition identified by `Token` and all
+  /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used
+  /// to track tokens of flow conditions that were already visited by recursive
+  /// calls.
+  void addTransitiveFlowConditionConstraints(
+      AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints,
+      llvm::DenseSet<AtomicBoolValue *> &VisitedTokens) const;
+
   std::unique_ptr<Solver> S;
 
   // Storage for the state of a program.
@@ -178,6 +208,27 @@ class DataflowAnalysisContext {
   llvm::DenseMap<std::pair<BoolValue *, BoolValue *>, DisjunctionValue *>
       DisjunctionVals;
   llvm::DenseMap<BoolValue *, NegationValue *> NegationVals;
+
+  // Flow conditions are tracked symbolically: each unique flow condition is
+  // associated with a fresh symbolic variable (token), bound to the clause that
+  // defines the flow condition. Conceptually, each binding corresponds to an
+  // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
+  // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
+  // flow condition clause. Internally, we do not record the formula directly as
+  // an "iff". Instead, a flow condition clause is encoded as conjuncts of the
+  // form `(FC v !C1 v !C2 v ...) ^ (C1 v !FC) ^ (C2 v !FC) ^ ...`. The first
+  // conjuct is stored in the `FlowConditionFirstConjuncts` map and the set of
+  // remaining conjuncts are stored in the `FlowConditionRemainingConjuncts`
+  // map, both keyed by the token of the flow condition.
+  //
+  // Flow conditions depend on other flow conditions if they are created using
+  // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
+  // dependencies is stored in the `FlowConditionDeps` map.
+  llvm::DenseMap<AtomicBoolValue *, llvm::DenseSet<AtomicBoolValue *>>
+      FlowConditionDeps;
+  llvm::DenseMap<AtomicBoolValue *, BoolValue *> FlowConditionFirstConjuncts;
+  llvm::DenseMap<AtomicBoolValue *, llvm::DenseSet<BoolValue *>>
+      FlowConditionRemainingConjuncts;
 };
 
 } // namespace dataflow

diff  --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index bf4b7e5cc5bd9..0a2c75f804c2a 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -111,7 +111,13 @@ class Environment {
 
   /// Creates an environment that uses `DACtx` to store objects that encompass
   /// the state of a program.
-  explicit Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx) {}
+  explicit Environment(DataflowAnalysisContext &DACtx);
+
+  Environment(const Environment &Other);
+  Environment &operator=(const Environment &Other);
+
+  Environment(Environment &&Other) = default;
+  Environment &operator=(Environment &&Other) = default;
 
   /// Creates an environment that uses `DACtx` to store objects that encompass
   /// the state of a program.
@@ -297,9 +303,8 @@ class Environment {
                : makeAnd(makeImplication(LHS, RHS), makeImplication(RHS, LHS));
   }
 
-  const llvm::DenseSet<BoolValue *> &getFlowConditionConstraints() const {
-    return FlowConditionConstraints;
-  }
+  /// Returns the token that identifies the flow condition of the environment.
+  AtomicBoolValue &getFlowConditionToken() const { return *FlowConditionToken; }
 
   /// Adds `Val` to the set of clauses that constitute the flow condition.
   void addToFlowCondition(BoolValue &Val);
@@ -345,7 +350,7 @@ class Environment {
                  std::pair<StructValue *, const ValueDecl *>>
       MemberLocToStruct;
 
-  llvm::DenseSet<BoolValue *> FlowConditionConstraints;
+  AtomicBoolValue *FlowConditionToken;
 };
 
 } // namespace dataflow

diff  --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
index afc38f2849af6..4274fca4e3c14 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -64,5 +64,81 @@ BoolValue &DataflowAnalysisContext::getOrCreateNegationValue(BoolValue &Val) {
   return *Res.first->second;
 }
 
+AtomicBoolValue &DataflowAnalysisContext::makeFlowConditionToken() {
+  AtomicBoolValue &Token = createAtomicBoolValue();
+  FlowConditionRemainingConjuncts[&Token] = {};
+  FlowConditionFirstConjuncts[&Token] = &Token;
+  return Token;
+}
+
+void DataflowAnalysisContext::addFlowConditionConstraint(
+    AtomicBoolValue &Token, BoolValue &Constraint) {
+  FlowConditionRemainingConjuncts[&Token].insert(&getOrCreateDisjunctionValue(
+      Constraint, getOrCreateNegationValue(Token)));
+  FlowConditionFirstConjuncts[&Token] =
+      &getOrCreateDisjunctionValue(*FlowConditionFirstConjuncts[&Token],
+                                   getOrCreateNegationValue(Constraint));
+}
+
+AtomicBoolValue &
+DataflowAnalysisContext::forkFlowCondition(AtomicBoolValue &Token) {
+  auto &ForkToken = makeFlowConditionToken();
+  FlowConditionDeps[&ForkToken].insert(&Token);
+  addFlowConditionConstraint(ForkToken, Token);
+  return ForkToken;
+}
+
+AtomicBoolValue &
+DataflowAnalysisContext::joinFlowConditions(AtomicBoolValue &FirstToken,
+                                            AtomicBoolValue &SecondToken) {
+  auto &Token = makeFlowConditionToken();
+  FlowConditionDeps[&Token].insert(&FirstToken);
+  FlowConditionDeps[&Token].insert(&SecondToken);
+  addFlowConditionConstraint(
+      Token, getOrCreateDisjunctionValue(FirstToken, SecondToken));
+  return Token;
+}
+
+bool DataflowAnalysisContext::flowConditionImplies(AtomicBoolValue &Token,
+                                                   BoolValue &Val) {
+  // Returns true if and only if truth assignment of the flow condition implies
+  // that `Val` is also true. We prove whether or not this property holds by
+  // reducing the problem to satisfiability checking. In other words, we attempt
+  // to show that assuming `Val` is false makes the constraints induced by the
+  // flow condition unsatisfiable.
+  llvm::DenseSet<BoolValue *> Constraints = {
+      &Token,
+      &getBoolLiteralValue(true),
+      &getOrCreateNegationValue(getBoolLiteralValue(false)),
+      &getOrCreateNegationValue(Val),
+  };
+  llvm::DenseSet<AtomicBoolValue *> VisitedTokens;
+  addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
+  return S->solve(std::move(Constraints)) == Solver::Result::Unsatisfiable;
+}
+
+void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
+    AtomicBoolValue &Token, llvm::DenseSet<BoolValue *> &Constraints,
+    llvm::DenseSet<AtomicBoolValue *> &VisitedTokens) const {
+  auto Res = VisitedTokens.insert(&Token);
+  if (!Res.second)
+    return;
+
+  auto FirstConjunctIT = FlowConditionFirstConjuncts.find(&Token);
+  if (FirstConjunctIT != FlowConditionFirstConjuncts.end())
+    Constraints.insert(FirstConjunctIT->second);
+  auto RemainingConjunctsIT = FlowConditionRemainingConjuncts.find(&Token);
+  if (RemainingConjunctsIT != FlowConditionRemainingConjuncts.end())
+    Constraints.insert(RemainingConjunctsIT->second.begin(),
+                       RemainingConjunctsIT->second.end());
+
+  auto DepsIT = FlowConditionDeps.find(&Token);
+  if (DepsIT != FlowConditionDeps.end()) {
+    for (AtomicBoolValue *DepToken : DepsIT->second)
+      addTransitiveFlowConditionConstraints(*DepToken, Constraints,
+                                            VisitedTokens);
+  }
+}
+
 } // namespace dataflow
 } // namespace clang

diff  --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 68c55b665cc61..469696643d319 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -77,33 +77,14 @@ static Value *mergeDistinctValues(QualType Type, Value *Val1,
                                   Environment &MergedEnv,
                                   Environment::ValueModel &Model) {
   // Join distinct boolean values preserving information about the constraints
-  // in the respective path conditions. Note: this construction can, in
-  // principle, result in exponential growth in the size of boolean values.
-  // Potential optimizations may be worth considering. For example, represent
-  // the flow condition of each environment using a bool atom and store, in
-  // `DataflowAnalysisContext`, a mapping of bi-conditionals between flow
-  // condition atoms and flow condition constraints. Something like:
-  // \code
-  //   FC1 <=> C1 ^ C2
-  //   FC2 <=> C2 ^ C3 ^ C4
-  //   FC3 <=> (FC1 v FC2) ^ C5
-  // \code
-  // Then, we can track dependencies between flow conditions (e.g. above `FC3`
-  // depends on `FC1` and `FC2`) and modify `flowConditionImplies` to construct
-  // a formula that includes the bi-conditionals for all flow condition atoms in
-  // the transitive set, before invoking the solver.
+  // in the respective path conditions.
   //
   // FIXME: Does not work for backedges, since the two (or more) paths will not
   // have mutually exclusive conditions.
   if (auto *Expr1 = dyn_cast<BoolValue>(Val1)) {
-    for (BoolValue *Constraint : Env1.getFlowConditionConstraints()) {
-      Expr1 = &MergedEnv.makeAnd(*Expr1, *Constraint);
-    }
     auto *Expr2 = cast<BoolValue>(Val2);
-    for (BoolValue *Constraint : Env2.getFlowConditionConstraints()) {
-      Expr2 = &MergedEnv.makeAnd(*Expr2, *Constraint);
-    }
-    return &MergedEnv.makeOr(*Expr1, *Expr2);
+    return &Env1.makeOr(Env1.makeAnd(Env1.getFlowConditionToken(), *Expr1),
+                        Env1.makeAnd(Env2.getFlowConditionToken(), *Expr2));
   }
 
   // FIXME: add unit tests that cover this statement.
@@ -166,63 +147,6 @@ static void initGlobalVars(const Stmt &S, Environment &Env) {
   }
 }
 
-/// Returns constraints that represent the disjunction of `Constraints1` and
-/// `Constraints2`.
-///
-/// Requirements:
-///
-///  The elements of `Constraints1` and `Constraints2` must not be null.
-llvm::DenseSet<BoolValue *>
-joinConstraints(DataflowAnalysisContext *Context,
-                const llvm::DenseSet<BoolValue *> &Constraints1,
-                const llvm::DenseSet<BoolValue *> &Constraints2) {
-  // `(X ^ Y) v (X ^ Z)` is logically equivalent to `X ^ (Y v Z)`. Therefore, to
-  // avoid unnecessarily expanding the resulting set of constraints, we will add
-  // all common constraints of `Constraints1` and `Constraints2` directly and
-  // add a disjunction of the constraints that are not common.
-
-  llvm::DenseSet<BoolValue *> JoinedConstraints;
-
-  if (Constraints1.empty() || Constraints2.empty()) {
-    // Disjunction of empty set and non-empty set is represented as empty set.
-    return JoinedConstraints;
-  }
-
-  BoolValue *Val1 = nullptr;
-  for (BoolValue *Constraint : Constraints1) {
-    if (Constraints2.contains(Constraint)) {
-      // Add common constraints directly to `JoinedConstraints`.
-      JoinedConstraints.insert(Constraint);
-    } else if (Val1 == nullptr) {
-      Val1 = Constraint;
-    } else {
-      Val1 = &Context->getOrCreateConjunctionValue(*Val1, *Constraint);
-    }
-  }
-
-  BoolValue *Val2 = nullptr;
-  for (BoolValue *Constraint : Constraints2) {
-    // Common constraints are added to `JoinedConstraints` above.
-    if (Constraints1.contains(Constraint)) {
-      continue;
-    }
-    if (Val2 == nullptr) {
-      Val2 = Constraint;
-    } else {
-      Val2 = &Context->getOrCreateConjunctionValue(*Val2, *Constraint);
-    }
-  }
-
-  // An empty set of constraints (represented as a null value) is interpreted as
-  // `true` and `true v X` is logically equivalent to `true` so we need to add a
-  // constraint only if both `Val1` and `Val2` are not null.
-  if (Val1 != nullptr && Val2 != nullptr)
-    JoinedConstraints.insert(
-        &Context->getOrCreateDisjunctionValue(*Val1, *Val2));
-
-  return JoinedConstraints;
-}
-
 static void
 getFieldsFromClassHierarchy(QualType Type, bool IgnorePrivateFields,
                             llvm::DenseSet<const FieldDecl *> &Fields) {
@@ -259,6 +183,22 @@ getAccessibleObjectFields(QualType Type) {
   return Fields;
 }
 
+Environment::Environment(DataflowAnalysisContext &DACtx)
+    : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {}
+
+Environment::Environment(const Environment &Other)
+    : DACtx(Other.DACtx), DeclToLoc(Other.DeclToLoc),
+      ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal),
+      MemberLocToStruct(Other.MemberLocToStruct),
+      FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) {
+}
+
+Environment &Environment::operator=(const Environment &Other) {
+  Environment Copy(Other);
+  *this = std::move(Copy);
+  return *this;
+}
+
 Environment::Environment(DataflowAnalysisContext &DACtx,
                          const DeclContext &DeclCtx)
     : Environment(DACtx) {
@@ -343,8 +283,8 @@ LatticeJoinEffect Environment::join(const Environment &Other,
     Effect = LatticeJoinEffect::Changed;
 
   // FIXME: set `Effect` as needed.
-  JoinedEnv.FlowConditionConstraints = joinConstraints(
-      DACtx, FlowConditionConstraints, Other.FlowConditionConstraints);
+  JoinedEnv.FlowConditionToken = &DACtx->joinFlowConditions(
+      *FlowConditionToken, *Other.FlowConditionToken);
 
   for (auto &Entry : LocToVal) {
     const StorageLocation *Loc = Entry.first;
@@ -610,22 +550,11 @@ const StorageLocation &Environment::skip(const StorageLocation &Loc,
 }
 
 void Environment::addToFlowCondition(BoolValue &Val) {
-  FlowConditionConstraints.insert(&Val);
+  DACtx->addFlowConditionConstraint(*FlowConditionToken, Val);
 }
 
 bool Environment::flowConditionImplies(BoolValue &Val) const {
-  // Returns true if and only if truth assignment of the flow condition implies
-  // that `Val` is also true. We prove whether or not this property holds by
-  // reducing the problem to satisfiability checking. In other words, we attempt
-  // to show that assuming `Val` is false makes the constraints induced by the
-  // flow condition unsatisfiable.
-  llvm::DenseSet<BoolValue *> Constraints = {
-      &makeNot(Val), &getBoolLiteralValue(true),
-      &makeNot(getBoolLiteralValue(false))};
-  Constraints.insert(FlowConditionConstraints.begin(),
-                     FlowConditionConstraints.end());
-  return DACtx->getSolver().solve(std::move(Constraints)) ==
-         Solver::Result::Unsatisfiable;
+  return DACtx->flowConditionImplies(*FlowConditionToken, Val);
 }
 
 } // namespace dataflow

diff  --git a/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
index 3280476779686..b4b9b169f440e 100644
--- a/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
@@ -90,4 +90,54 @@ TEST_F(DataflowAnalysisContextTest,
   EXPECT_NE(&NotX1, &NotY);
 }
 
+TEST_F(DataflowAnalysisContextTest, EmptyFlowCondition) {
+  auto &FC = Context.makeFlowConditionToken();
+  auto &C = Context.createAtomicBoolValue();
+  EXPECT_FALSE(Context.flowConditionImplies(FC, C));
+}
+
+TEST_F(DataflowAnalysisContextTest, AddFlowConditionConstraint) {
+  auto &FC = Context.makeFlowConditionToken();
+  auto &C = Context.createAtomicBoolValue();
+  Context.addFlowConditionConstraint(FC, C);
+  EXPECT_TRUE(Context.flowConditionImplies(FC, C));
+}
+
+TEST_F(DataflowAnalysisContextTest, ForkFlowCondition) {
+  auto &FC1 = Context.makeFlowConditionToken();
+  auto &C1 = Context.createAtomicBoolValue();
+  Context.addFlowConditionConstraint(FC1, C1);
+
+  // Forked flow condition inherits the constraints of its parent flow
+  // condition.
+  auto &FC2 = Context.forkFlowCondition(FC1);
+  EXPECT_TRUE(Context.flowConditionImplies(FC2, C1));
+
+  // Adding a new constraint to the forked flow condition does not affect its
+  // parent flow condition.
+  auto &C2 = Context.createAtomicBoolValue();
+  Context.addFlowConditionConstraint(FC2, C2);
+  EXPECT_TRUE(Context.flowConditionImplies(FC2, C2));
+  EXPECT_FALSE(Context.flowConditionImplies(FC1, C2));
+}
+
+TEST_F(DataflowAnalysisContextTest, JoinFlowConditions) {
+  auto &C1 = Context.createAtomicBoolValue();
+  auto &C2 = Context.createAtomicBoolValue();
+  auto &C3 = Context.createAtomicBoolValue();
+
+  auto &FC1 = Context.makeFlowConditionToken();
+  Context.addFlowConditionConstraint(FC1, C1);
+  Context.addFlowConditionConstraint(FC1, C3);
+
+  auto &FC2 = Context.makeFlowConditionToken();
+  Context.addFlowConditionConstraint(FC2, C2);
+  Context.addFlowConditionConstraint(FC2, C3);
+
+  auto &FC3 = Context.joinFlowConditions(FC1, FC2);
+  EXPECT_FALSE(Context.flowConditionImplies(FC3, C1));
+  EXPECT_FALSE(Context.flowConditionImplies(FC3, C2));
+  EXPECT_TRUE(Context.flowConditionImplies(FC3, C3));
+}
+
 } // namespace


        


More information about the cfe-commits mailing list