[clang] [dataflow] Add global condition to DataflowAnalysisContext (PR #65949)

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 14 06:00:58 PDT 2023


https://github.com/sam-mccall updated https://github.com/llvm/llvm-project/pull/65949:

>From 1d765a3b22cd81d6739a20f2b2f7b68935773f2a Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall at gmail.com>
Date: Mon, 11 Sep 2023 13:21:11 +0200
Subject: [PATCH 1/4] [dataflow] Add global condition to
 DataflowAnalysisContext

This records facts that are not sensitive to the current flow condition,
and should apply to all environments.

The motivating case is recording information about where a Value
originated, such as nullability:
 - we may see the same Value for multiple expressions (e.g. reads of the
   same field) in multiple environments (multiple blocks or iterations)
 - we want to record information only when we first see the Value
   (e.g. Nullability annotations on fields only add information if we
   don't know where the value came from)
 - this information should be expressible as a SAT condition
 - we must add this SAT condition to every environment where the
   Value may appear

We solve this by recording the information in the global condition.
This doesn't seem particularly elegant, but solves the problem and is
a fairly small and natural extension of the Environment.

Alternatives considered:
 - store the constraint directly as a property on the Value.
   But it's more composable for such properties to always be variables
   (AtomicBoolValue), and constrain them with SAT conditions.
 - add a hook whenever values are created, giving the analysis the
   chance to populate them.
   However the framework relies on/provides the ability to construct
   values in arbitrary places without providing the context such a hook
   would need, this would be a very invasive change.
---
 .../FlowSensitive/DataflowAnalysisContext.h   | 20 +++++--
 .../FlowSensitive/DataflowAnalysisContext.cpp | 60 +++++++++++--------
 .../DataflowAnalysisContextTest.cpp           | 16 +++++
 3 files changed, 66 insertions(+), 30 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
index 685bbe486b54e1f..8ff7e584887241a 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
@@ -108,6 +108,14 @@ class DataflowAnalysisContext {
   /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
   PointerValue &getOrCreateNullPointerValue(QualType PointeeType);
 
+  /// Adds `Constraint` to current and future flow conditions in this context.
+  ///
+  /// This must be used with care: the constraint must be truly global across
+  /// the analysis, and should not invalidate the result of past logic.
+  ///
+  /// Adding constraints on freshly created boolean variables is generally safe.
+  void addGlobalConstraint(const Formula &Constraint);
+
   /// Adds `Constraint` to the flow condition identified by `Token`.
   void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
 
@@ -174,12 +182,11 @@ class DataflowAnalysisContext {
   void addModeledFields(const FieldSet &Fields);
 
   /// Adds all constraints of the flow condition identified by `Token` and all
-  /// of its transitive dependencies to `Constraints`. `VisitedTokens` is used
-  /// to track tokens of flow conditions that were already visited by recursive
-  /// calls.
-  void addTransitiveFlowConditionConstraints(
-      Atom Token, llvm::SetVector<const Formula *> &Constraints,
-      llvm::DenseSet<Atom> &VisitedTokens);
+  /// of its transitive dependencies to `Constraints`.
+  void
+  addTransitiveFlowConditionConstraints(Atom Token,
+                                        llvm::SetVector<const Formula *> &Out);
+
 
   /// Returns true if the solver is able to prove that there is no satisfying
   /// assignment for `Constraints`
@@ -224,6 +231,7 @@ class DataflowAnalysisContext {
   // dependencies is stored in the `FlowConditionDeps` map.
   llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
   llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
+  const Formula *GlobalConstraints = nullptr;
 
   llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts;
 
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
index 47a994f4bbdb6a1..f2b3207609801e4 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -104,6 +104,13 @@ DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
   return *Res.first->second;
 }
 
+void DataflowAnalysisContext::addGlobalConstraint(const Formula &Constraint) {
+  if (GlobalConstraints == nullptr)
+    GlobalConstraints = &Constraint;
+  else
+    GlobalConstraints = &arena().makeAnd(*GlobalConstraints, Constraint);
+}
+
 void DataflowAnalysisContext::addFlowConditionConstraint(
     Atom Token, const Formula &Constraint) {
   auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint);
@@ -150,7 +157,7 @@ bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
   Constraints.insert(&arena().makeAtomRef(Token));
   Constraints.insert(&arena().makeNot(Val));
   llvm::DenseSet<Atom> VisitedTokens;
-  addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
+  addTransitiveFlowConditionConstraints(Token, Constraints);
   return isUnsatisfiable(std::move(Constraints));
 }
 
@@ -160,7 +167,7 @@ bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) {
   llvm::SetVector<const Formula *> Constraints;
   Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token)));
   llvm::DenseSet<Atom> VisitedTokens;
-  addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
+  addTransitiveFlowConditionConstraints(Token, Constraints);
   return isUnsatisfiable(std::move(Constraints));
 }
 
@@ -172,27 +179,33 @@ bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1,
 }
 
 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
-    Atom Token, llvm::SetVector<const Formula *> &Constraints,
-    llvm::DenseSet<Atom> &VisitedTokens) {
-  auto Res = VisitedTokens.insert(Token);
-  if (!Res.second)
-    return;
-
-  auto ConstraintsIt = FlowConditionConstraints.find(Token);
-  if (ConstraintsIt == FlowConditionConstraints.end()) {
-    Constraints.insert(&arena().makeAtomRef(Token));
-  } else {
-    // Bind flow condition token via `iff` to its set of constraints:
-    // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
-    Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
-                                           *ConstraintsIt->second));
-  }
+    Atom Token, llvm::SetVector<const Formula *> &Constraints) {
+  llvm::DenseSet<Atom> AddedTokens;
+  std::vector<Atom> Remaining = {Token};
+
+  if (GlobalConstraints)
+    Constraints.insert(GlobalConstraints);
+  // Define all the flow conditions that might be referenced in constraints.
+  while (!Remaining.empty()) {
+    auto Token = Remaining.back();
+    Remaining.pop_back();
+    if (!AddedTokens.insert(Token).second)
+      continue;
+
+    auto ConstraintsIt = FlowConditionConstraints.find(Token);
+    if (ConstraintsIt == FlowConditionConstraints.end()) {
+      Constraints.insert(&arena().makeAtomRef(Token));
+    } else {
+      // Bind flow condition token via `iff` to its set of constraints:
+      // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints
+      Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token),
+                                             *ConstraintsIt->second));
+    }
 
-  auto DepsIt = FlowConditionDeps.find(Token);
-  if (DepsIt != FlowConditionDeps.end()) {
-    for (Atom DepToken : DepsIt->second) {
-      addTransitiveFlowConditionConstraints(DepToken, Constraints,
-                                            VisitedTokens);
+    if (auto DepsIt = FlowConditionDeps.find(Token);
+        DepsIt != FlowConditionDeps.end()) {
+      for (Atom A : DepsIt->second)
+        Remaining.push_back(A);
     }
   }
 }
@@ -201,8 +214,7 @@ void DataflowAnalysisContext::dumpFlowCondition(Atom Token,
                                                 llvm::raw_ostream &OS) {
   llvm::SetVector<const Formula *> Constraints;
   Constraints.insert(&arena().makeAtomRef(Token));
-  llvm::DenseSet<Atom> VisitedTokens;
-  addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens);
+  addTransitiveFlowConditionConstraints(Token, Constraints);
 
   // TODO: have formulas know about true/false directly instead
   Atom True = arena().makeLiteral(true).getAtom();
diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
index f6e8b30d898e892..e624f6cfd0f6b6f 100644
--- a/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
@@ -46,6 +46,22 @@ TEST_F(DataflowAnalysisContextTest, AddFlowConditionConstraint) {
   EXPECT_TRUE(Context.flowConditionImplies(FC, C));
 }
 
+TEST_F(DataflowAnalysisContextTest, AddGlobalConstraint) {
+  Atom FC = A.makeFlowConditionToken();
+  auto &C = A.makeAtomRef(A.makeAtom());
+  Context.addGlobalConstraint(C);
+  EXPECT_TRUE(Context.flowConditionImplies(FC, C));
+}
+
+TEST_F(DataflowAnalysisContextTest, GlobalAndFCConstraintInteract) {
+  Atom FC = A.makeFlowConditionToken();
+  auto &C = A.makeAtomRef(A.makeAtom());
+  auto &D = A.makeAtomRef(A.makeAtom());
+  Context.addGlobalConstraint(A.makeImplies(C, D));
+  Context.addFlowConditionConstraint(FC, C);
+  EXPECT_TRUE(Context.flowConditionImplies(FC, D));
+}
+
 TEST_F(DataflowAnalysisContextTest, ForkFlowCondition) {
   Atom FC1 = A.makeFlowConditionToken();
   auto &C1 = A.makeAtomRef(A.makeAtom());

>From 7d71bb1c3a6882205c719e4174263b1f9d4381aa Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall at gmail.com>
Date: Mon, 11 Sep 2023 19:08:12 +0200
Subject: [PATCH 2/4] Tweak comment for global condition

---
 .../Analysis/FlowSensitive/DataflowAnalysisContext.h   | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
index 8ff7e584887241a..4fecf0a0c0019c2 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
@@ -110,10 +110,12 @@ class DataflowAnalysisContext {
 
   /// Adds `Constraint` to current and future flow conditions in this context.
   ///
-  /// This must be used with care: the constraint must be truly global across
-  /// the analysis, and should not invalidate the result of past logic.
-  ///
-  /// Adding constraints on freshly created boolean variables is generally safe.
+  /// The global condition must contain only flow-insensitive information, i.e.
+  /// facts that are true on all paths through the program.
+  /// Information can be added eagerly (when analysis begins), or lazily (e.g.
+  /// when values are first used). The analysis must be careful that the same
+  /// information is added to the global condition regardless of which order
+  /// blocks are analyzed in.
   void addGlobalConstraint(const Formula &Constraint);
 
   /// Adds `Constraint` to the flow condition identified by `Token`.

>From c3f962e72e2d3fda2f88c3c4518aa5e0b617bb23 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall at gmail.com>
Date: Wed, 13 Sep 2023 11:51:38 +0200
Subject: [PATCH 3/4] Global => Common Drop unused

---
 .../Analysis/FlowSensitive/DataflowAnalysisContext.h      | 6 +++---
 .../Analysis/FlowSensitive/DataflowAnalysisContext.cpp    | 7 ++-----
 .../FlowSensitive/DataflowAnalysisContextTest.cpp         | 8 ++++----
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
index 4fecf0a0c0019c2..42a0a6f4f519295 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
@@ -110,13 +110,13 @@ class DataflowAnalysisContext {
 
   /// Adds `Constraint` to current and future flow conditions in this context.
   ///
-  /// The global condition must contain only flow-insensitive information, i.e.
+  /// The common condition must contain only flow-insensitive information, i.e.
   /// facts that are true on all paths through the program.
   /// Information can be added eagerly (when analysis begins), or lazily (e.g.
   /// when values are first used). The analysis must be careful that the same
-  /// information is added to the global condition regardless of which order
+  /// information is added to the common condition regardless of which order
   /// blocks are analyzed in.
-  void addGlobalConstraint(const Formula &Constraint);
+  void addCommonConstraint(const Formula &Constraint);
 
   /// Adds `Constraint` to the flow condition identified by `Token`.
   void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
index f2b3207609801e4..542c2859ddb58c4 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -104,7 +104,7 @@ DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
   return *Res.first->second;
 }
 
-void DataflowAnalysisContext::addGlobalConstraint(const Formula &Constraint) {
+void DataflowAnalysisContext::addCommonConstraint(const Formula &Constraint) {
   if (GlobalConstraints == nullptr)
     GlobalConstraints = &Constraint;
   else
@@ -156,7 +156,6 @@ bool DataflowAnalysisContext::flowConditionImplies(Atom Token,
   llvm::SetVector<const Formula *> Constraints;
   Constraints.insert(&arena().makeAtomRef(Token));
   Constraints.insert(&arena().makeNot(Val));
-  llvm::DenseSet<Atom> VisitedTokens;
   addTransitiveFlowConditionConstraints(Token, Constraints);
   return isUnsatisfiable(std::move(Constraints));
 }
@@ -166,7 +165,6 @@ bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) {
   // ever be false.
   llvm::SetVector<const Formula *> Constraints;
   Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token)));
-  llvm::DenseSet<Atom> VisitedTokens;
   addTransitiveFlowConditionConstraints(Token, Constraints);
   return isUnsatisfiable(std::move(Constraints));
 }
@@ -203,10 +201,9 @@ void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
     }
 
     if (auto DepsIt = FlowConditionDeps.find(Token);
-        DepsIt != FlowConditionDeps.end()) {
+        DepsIt != FlowConditionDeps.end())
       for (Atom A : DepsIt->second)
         Remaining.push_back(A);
-    }
   }
 }
 
diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
index e624f6cfd0f6b6f..71146f70d2eb297 100644
--- a/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
@@ -46,18 +46,18 @@ TEST_F(DataflowAnalysisContextTest, AddFlowConditionConstraint) {
   EXPECT_TRUE(Context.flowConditionImplies(FC, C));
 }
 
-TEST_F(DataflowAnalysisContextTest, AddGlobalConstraint) {
+TEST_F(DataflowAnalysisContextTest, AddCommonConstraint) {
   Atom FC = A.makeFlowConditionToken();
   auto &C = A.makeAtomRef(A.makeAtom());
-  Context.addGlobalConstraint(C);
+  Context.addCommonConstraint(C);
   EXPECT_TRUE(Context.flowConditionImplies(FC, C));
 }
 
-TEST_F(DataflowAnalysisContextTest, GlobalAndFCConstraintInteract) {
+TEST_F(DataflowAnalysisContextTest, CommonAndFCConstraintInteract) {
   Atom FC = A.makeFlowConditionToken();
   auto &C = A.makeAtomRef(A.makeAtom());
   auto &D = A.makeAtomRef(A.makeAtom());
-  Context.addGlobalConstraint(A.makeImplies(C, D));
+  Context.addCommonConstraint(A.makeImplies(C, D));
   Context.addFlowConditionConstraint(FC, C);
   EXPECT_TRUE(Context.flowConditionImplies(FC, D));
 }

>From e847f34b8cff493821cabb2addcd248a47f18cb5 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall at gmail.com>
Date: Thu, 14 Sep 2023 15:00:34 +0200
Subject: [PATCH 4/4] Common => Invariant

---
 .../Analysis/FlowSensitive/DataflowAnalysisContext.h | 11 +++++------
 .../FlowSensitive/DataflowAnalysisContext.cpp        | 12 ++++++------
 .../FlowSensitive/DataflowAnalysisContextTest.cpp    |  8 ++++----
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
index 42a0a6f4f519295..4698f0616e66e82 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
@@ -110,13 +110,12 @@ class DataflowAnalysisContext {
 
   /// Adds `Constraint` to current and future flow conditions in this context.
   ///
-  /// The common condition must contain only flow-insensitive information, i.e.
-  /// facts that are true on all paths through the program.
+  /// Invariants must contain only flow-insensitive information, i.e. facts that
+  /// are true on all paths through the program.
   /// Information can be added eagerly (when analysis begins), or lazily (e.g.
   /// when values are first used). The analysis must be careful that the same
-  /// information is added to the common condition regardless of which order
-  /// blocks are analyzed in.
-  void addCommonConstraint(const Formula &Constraint);
+  /// information is added regardless of which order blocks are analyzed in.
+  void addInvariant(const Formula &Constraint);
 
   /// Adds `Constraint` to the flow condition identified by `Token`.
   void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
@@ -233,7 +232,7 @@ class DataflowAnalysisContext {
   // dependencies is stored in the `FlowConditionDeps` map.
   llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
   llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
-  const Formula *GlobalConstraints = nullptr;
+  const Formula *Invariant = nullptr;
 
   llvm::DenseMap<const FunctionDecl *, ControlFlowContext> FunctionContexts;
 
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
index 542c2859ddb58c4..e81048ce9233808 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp
@@ -104,11 +104,11 @@ DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) {
   return *Res.first->second;
 }
 
-void DataflowAnalysisContext::addCommonConstraint(const Formula &Constraint) {
-  if (GlobalConstraints == nullptr)
-    GlobalConstraints = &Constraint;
+void DataflowAnalysisContext::addInvariant(const Formula &Constraint) {
+  if (Invariant == nullptr)
+    Invariant = &Constraint;
   else
-    GlobalConstraints = &arena().makeAnd(*GlobalConstraints, Constraint);
+    Invariant = &arena().makeAnd(*Invariant, Constraint);
 }
 
 void DataflowAnalysisContext::addFlowConditionConstraint(
@@ -181,8 +181,8 @@ void DataflowAnalysisContext::addTransitiveFlowConditionConstraints(
   llvm::DenseSet<Atom> AddedTokens;
   std::vector<Atom> Remaining = {Token};
 
-  if (GlobalConstraints)
-    Constraints.insert(GlobalConstraints);
+  if (Invariant)
+    Constraints.insert(Invariant);
   // Define all the flow conditions that might be referenced in constraints.
   while (!Remaining.empty()) {
     auto Token = Remaining.back();
diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
index 71146f70d2eb297..fb7642c131e3190 100644
--- a/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
@@ -46,18 +46,18 @@ TEST_F(DataflowAnalysisContextTest, AddFlowConditionConstraint) {
   EXPECT_TRUE(Context.flowConditionImplies(FC, C));
 }
 
-TEST_F(DataflowAnalysisContextTest, AddCommonConstraint) {
+TEST_F(DataflowAnalysisContextTest, AddInvariant) {
   Atom FC = A.makeFlowConditionToken();
   auto &C = A.makeAtomRef(A.makeAtom());
-  Context.addCommonConstraint(C);
+  Context.addInvariant(C);
   EXPECT_TRUE(Context.flowConditionImplies(FC, C));
 }
 
-TEST_F(DataflowAnalysisContextTest, CommonAndFCConstraintInteract) {
+TEST_F(DataflowAnalysisContextTest, InvariantAndFCConstraintInteract) {
   Atom FC = A.makeFlowConditionToken();
   auto &C = A.makeAtomRef(A.makeAtom());
   auto &D = A.makeAtomRef(A.makeAtom());
-  Context.addCommonConstraint(A.makeImplies(C, D));
+  Context.addInvariant(A.makeImplies(C, D));
   Context.addFlowConditionConstraint(FC, C);
   EXPECT_TRUE(Context.flowConditionImplies(FC, D));
 }



More information about the cfe-commits mailing list