[clang] [clang][dataflow] Add new `join` API and replace existing `merge` implementations. (PR #80361)

Yitzhak Mandelbaum via cfe-commits cfe-commits at lists.llvm.org
Thu Feb 1 15:07:12 PST 2024


https://github.com/ymand updated https://github.com/llvm/llvm-project/pull/80361

>From 660d1afbda79416beb6f373e6252670f912a5181 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Tue, 30 Jan 2024 16:02:21 +0000
Subject: [PATCH] Add new `join` API and replace existing `merge`
 implementations.

This patch adds a new interface for the join operation, now properly called
`join`. Originally, the framework offered a single `merge` operation, which
could serve either as a join or a widening. In practice, though we found this
conflation didn't work for non-trivial anlyses, and split of the widening
operation (`widen`). This change completes the transition by introducing a
proper `join` with strict join semantics.

In the process, it drops an odd (and often misused) aspect of `merge` wherein
callees could implictly instruct the framework to drop the current entry by
returning `false`. This features was never used correctly in analyses and
doesn't belong in a join operation, so it is omitted.
---
 .../FlowSensitive/DataflowEnvironment.h       | 25 +++++++-
 .../FlowSensitive/DataflowEnvironment.cpp     | 59 +++++++++----------
 .../FlowSensitive/SignAnalysisTest.cpp        | 58 +++++++++---------
 .../TypeErasedDataflowAnalysisTest.cpp        | 29 ++++-----
 4 files changed, 92 insertions(+), 79 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 1543f900e401d..79100cccafccf 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -19,7 +19,6 @@
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/Type.h"
-#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
 #include "clang/Analysis/FlowSensitive/Formula.h"
@@ -31,7 +30,6 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <memory>
 #include <type_traits>
 #include <utility>
 
@@ -81,6 +79,8 @@ class Environment {
       return ComparisonResult::Unknown;
     }
 
+    /// DEPRECATED. Override `join` and/or `widen`, instead.
+    ///
     /// Modifies `MergedVal` to approximate both `Val1` and `Val2`. This could
     /// be a strict lattice join or a more general widening operation.
     ///
@@ -105,6 +105,27 @@ class Environment {
       return true;
     }
 
+    /// Modifies `JoinedVal` to approximate both `Val1` and `Val2`. This should
+    /// obey the properties of a lattice join.
+    ///
+    /// `Env1` and `Env2` can be used to query child values and path condition
+    /// implications of `Val1` and `Val2` respectively.
+    ///
+    /// Requirements:
+    ///
+    ///  `Val1` and `Val2` must be distinct.
+    ///
+    ///  `Val1`, `Val2`, and `JoinedVal` must model values of type `Type`.
+    ///
+    ///  `Val1` and `Val2` must be assigned to the same storage location in
+    ///  `Env1` and `Env2` respectively.
+    virtual void join(QualType Type, const Value &Val1, const Environment &Env1,
+                      const Value &Val2, const Environment &Env2,
+                      Value &JoinedVal, Environment &JoinedEnv) {
+      assert(merge(Type, Val1, Env1, Val2, Env2, JoinedVal, JoinedEnv) &&
+             "dropping merged value is unsupported");
+    }
+
     /// This function may widen the current value -- replace it with an
     /// approximation that can reach a fixed point more quickly than iterated
     /// application of the transfer function alone. The previous value is
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 01db65866d135..24811ded970e8 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -86,15 +86,15 @@ static bool compareDistinctValues(QualType Type, Value &Val1,
   llvm_unreachable("All cases covered in switch");
 }
 
-/// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
-/// respectively, of the same type `Type`. Merging generally produces a single
+/// Attempts to join distinct values `Val1` and `Val2` in `Env1` and `Env2`,
+/// respectively, of the same type `Type`. Joining generally produces a single
 /// value that (soundly) approximates the two inputs, although the actual
 /// meaning depends on `Model`.
-static Value *mergeDistinctValues(QualType Type, Value &Val1,
-                                  const Environment &Env1, Value &Val2,
-                                  const Environment &Env2,
-                                  Environment &MergedEnv,
-                                  Environment::ValueModel &Model) {
+static Value *joinDistinctValues(QualType Type, Value &Val1,
+                                 const Environment &Env1, Value &Val2,
+                                 const Environment &Env2,
+                                 Environment &JoinedEnv,
+                                 Environment::ValueModel &Model) {
   // Join distinct boolean values preserving information about the constraints
   // in the respective path conditions.
   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
@@ -113,17 +113,17 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1,
     // ```
     auto &Expr1 = cast<BoolValue>(Val1).formula();
     auto &Expr2 = cast<BoolValue>(Val2).formula();
-    auto &A = MergedEnv.arena();
-    auto &MergedVal = A.makeAtomRef(A.makeAtom());
-    MergedEnv.assume(
+    auto &A = JoinedEnv.arena();
+    auto &JoinedVal = A.makeAtomRef(A.makeAtom());
+    JoinedEnv.assume(
         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
-                           A.makeEquals(MergedVal, Expr1)),
+                           A.makeEquals(JoinedVal, Expr1)),
                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
-                           A.makeEquals(MergedVal, Expr2))));
-    return &A.makeBoolValue(MergedVal);
+                           A.makeEquals(JoinedVal, Expr2))));
+    return &A.makeBoolValue(JoinedVal);
   }
 
-  Value *MergedVal = nullptr;
+  Value *JoinedVal = nullptr;
   if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
     auto *RecordVal2 = cast<RecordValue>(&Val2);
 
@@ -131,24 +131,21 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1,
       // `RecordVal1` and `RecordVal2` may have different properties associated
       // with them. Create a new `RecordValue` with the same location but
       // without any properties so that we soundly approximate both values. If a
-      // particular analysis needs to merge properties, it should do so in
-      // `DataflowAnalysis::merge()`.
-      MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
+      // particular analysis needs to join properties, it should do so in
+      // `DataflowAnalysis::join()`.
+      JoinedVal = &JoinedEnv.create<RecordValue>(RecordVal1->getLoc());
     else
       // If the locations for the two records are different, need to create a
       // completely new value.
-      MergedVal = MergedEnv.createValue(Type);
+      JoinedVal = JoinedEnv.createValue(Type);
   } else {
-    MergedVal = MergedEnv.createValue(Type);
+    JoinedVal = JoinedEnv.createValue(Type);
   }
 
-  // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
-  // returns false to avoid storing unneeded values in `DACtx`.
-  if (MergedVal)
-    if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
-      return MergedVal;
+  if (JoinedVal)
+    Model.join(Type, Val1, Env1, Val2, Env2, *JoinedVal, JoinedEnv);
 
-  return nullptr;
+  return JoinedVal;
 }
 
 // When widening does not change `Current`, return value will equal `&Prev`.
@@ -240,9 +237,9 @@ joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
       continue;
     }
 
-    if (Value *MergedVal = mergeDistinctValues(
+    if (Value *JoinedVal = joinDistinctValues(
             Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
-      Result.insert({Loc, MergedVal});
+      Result.insert({Loc, JoinedVal});
     }
   }
 
@@ -657,10 +654,10 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
     // cast.
     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
     assert(Func != nullptr);
-    if (Value *MergedVal =
-            mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
-                                *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
-      JoinedEnv.ReturnVal = MergedVal;
+    if (Value *JoinedVal =
+            joinDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
+                               *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
+      JoinedEnv.ReturnVal = JoinedVal;
   }
 
   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
diff --git a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
index a6f4c45504fa6..b8fc528dbdce0 100644
--- a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
@@ -364,18 +364,17 @@ class SignPropagationAnalysis
     LatticeTransferState State(L, Env);
     TransferMatchSwitch(Elt, getASTContext(), State);
   }
-  bool merge(QualType Type, const Value &Val1, const Environment &Env1,
-             const Value &Val2, const Environment &Env2, Value &MergedVal,
-             Environment &MergedEnv) override;
+  void join(QualType Type, const Value &Val1, const Environment &Env1,
+            const Value &Val2, const Environment &Env2, Value &MergedVal,
+            Environment &MergedEnv) override;
 
 private:
   CFGMatchSwitch<TransferState<NoopLattice>> TransferMatchSwitch;
 };
 
-// Copied from crubit.
-BoolValue &mergeBoolValues(BoolValue &Bool1, const Environment &Env1,
-                           BoolValue &Bool2, const Environment &Env2,
-                           Environment &MergedEnv) {
+BoolValue &joinBoolValues(BoolValue &Bool1, const Environment &Env1,
+                          BoolValue &Bool2, const Environment &Env2,
+                          Environment &JoinedEnv) {
   if (&Bool1 == &Bool2) {
     return Bool1;
   }
@@ -383,41 +382,40 @@ BoolValue &mergeBoolValues(BoolValue &Bool1, const Environment &Env1,
   auto &B1 = Bool1.formula();
   auto &B2 = Bool2.formula();
 
-  auto &A = MergedEnv.arena();
-  auto &MergedBool = MergedEnv.makeAtomicBoolValue();
+  auto &A = JoinedEnv.arena();
+  auto &JoinedBool = JoinedEnv.makeAtomicBoolValue();
 
   // If `Bool1` and `Bool2` is constrained to the same true / false value,
-  // `MergedBool` can be constrained similarly without needing to consider the
-  // path taken - this simplifies the flow condition tracked in `MergedEnv`.
+  // `JoinedBool` can be constrained similarly without needing to consider the
+  // path taken - this simplifies the flow condition tracked in `JoinedEnv`.
   // Otherwise, information about which path was taken is used to associate
-  // `MergedBool` with `Bool1` and `Bool2`.
+  // `JoinedBool` with `Bool1` and `Bool2`.
   if (Env1.proves(B1) && Env2.proves(B2)) {
-    MergedEnv.assume(MergedBool.formula());
+    JoinedEnv.assume(JoinedBool.formula());
   } else if (Env1.proves(A.makeNot(B1)) && Env2.proves(A.makeNot(B2))) {
-    MergedEnv.assume(A.makeNot(MergedBool.formula()));
+    JoinedEnv.assume(A.makeNot(JoinedBool.formula()));
   }
-  return MergedBool;
+  return JoinedBool;
 }
 
-bool SignPropagationAnalysis::merge(QualType Type, const Value &Val1,
-                                    const Environment &Env1, const Value &Val2,
-                                    const Environment &Env2, Value &MergedVal,
-                                    Environment &MergedEnv) {
+void SignPropagationAnalysis::join(QualType Type, const Value &Val1,
+                                   const Environment &Env1, const Value &Val2,
+                                   const Environment &Env2, Value &JoinedVal,
+                                   Environment &JoinedEnv) {
   if (!Type->isIntegerType())
-    return false;
+    return;
   SignProperties Ps1 = getSignProperties(Val1, Env1);
   SignProperties Ps2 = getSignProperties(Val2, Env2);
   if (!Ps1.Neg || !Ps2.Neg)
-    return false;
-  BoolValue &MergedNeg =
-      mergeBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, MergedEnv);
-  BoolValue &MergedZero =
-      mergeBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, MergedEnv);
-  BoolValue &MergedPos =
-      mergeBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, MergedEnv);
-  setSignProperties(MergedVal,
-                    SignProperties{&MergedNeg, &MergedZero, &MergedPos});
-  return true;
+    return;
+  BoolValue &JoinedNeg =
+      joinBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, JoinedEnv);
+  BoolValue &JoinedZero =
+      joinBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, JoinedEnv);
+  BoolValue &JoinedPos =
+      joinBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, JoinedEnv);
+  setSignProperties(JoinedVal,
+                    SignProperties{&JoinedNeg, &JoinedZero, &JoinedPos});
 }
 
 template <typename Matcher>
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index 466d33358fd38..3bca9cced8d6f 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -672,26 +672,23 @@ class NullPointerAnalysis final
                                                : ComparisonResult::Different;
   }
 
-  bool merge(QualType Type, const Value &Val1, const Environment &Env1,
-             const Value &Val2, const Environment &Env2, Value &MergedVal,
-             Environment &MergedEnv) override {
-    // Nothing to say about a value that is not a pointer.
+  void join(QualType Type, const Value &Val1, const Environment &Env1,
+            const Value &Val2, const Environment &Env2, Value &JoinedVal,
+            Environment &JoinedEnv) override {
+    // Nothing to say about a value that is not a pointer...
     if (!Type->isPointerType())
-      return false;
+      return;
 
+    // ... or, a pointer without the `is_null` property.
     auto *IsNull1 = cast_or_null<BoolValue>(Val1.getProperty("is_null"));
-    if (IsNull1 == nullptr)
-      return false;
-
     auto *IsNull2 = cast_or_null<BoolValue>(Val2.getProperty("is_null"));
-    if (IsNull2 == nullptr)
-      return false;
+    if (IsNull1 == nullptr || IsNull2 == nullptr)
+      return;
 
     if (IsNull1 == IsNull2)
-      MergedVal.setProperty("is_null", *IsNull1);
+      JoinedVal.setProperty("is_null", *IsNull1);
     else
-      MergedVal.setProperty("is_null", MergedEnv.makeTopBoolValue());
-    return true;
+      JoinedVal.setProperty("is_null", JoinedEnv.makeTopBoolValue());
   }
 };
 
@@ -1176,7 +1173,7 @@ TEST_F(FlowConditionTest, Join) {
 // Note: currently, arbitrary function calls are uninterpreted, so the test
 // exercises this case. If and when we change that, this test will not add to
 // coverage (although it may still test a valuable case).
-TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
+TEST_F(FlowConditionTest, OpaqueFlowConditionJoinsToOpaqueBool) {
   std::string Code = R"(
     bool foo();
 
@@ -1211,7 +1208,7 @@ TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
 // the first instance), so the test exercises this case. If and when we change
 // that, this test will not add to coverage (although it may still test a
 // valuable case).
-TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
+TEST_F(FlowConditionTest, OpaqueFieldFlowConditionJoinsToOpaqueBool) {
   std::string Code = R"(
     struct Rec {
       Rec* Next;
@@ -1249,7 +1246,7 @@ TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
 // condition is not meaningfully interpreted. Adds to above by nesting the
 // interestnig case inside a normal branch. This protects against degenerate
 // solutions which only test for empty flow conditions, for example.
-TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchMergesToOpaqueBool) {
+TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchJoinsToOpaqueBool) {
   std::string Code = R"(
     bool foo();
 



More information about the cfe-commits mailing list