[clang] [clang][dataflow] Add new `join` API and replace existing `merge` implementations. (PR #80361)
Yitzhak Mandelbaum via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 1 15:07:12 PST 2024
https://github.com/ymand updated https://github.com/llvm/llvm-project/pull/80361
>From 660d1afbda79416beb6f373e6252670f912a5181 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm at google.com>
Date: Tue, 30 Jan 2024 16:02:21 +0000
Subject: [PATCH] Add new `join` API and replace existing `merge`
implementations.
This patch adds a new interface for the join operation, now properly called
`join`. Originally, the framework offered a single `merge` operation, which
could serve either as a join or a widening. In practice, though we found this
conflation didn't work for non-trivial anlyses, and split of the widening
operation (`widen`). This change completes the transition by introducing a
proper `join` with strict join semantics.
In the process, it drops an odd (and often misused) aspect of `merge` wherein
callees could implictly instruct the framework to drop the current entry by
returning `false`. This features was never used correctly in analyses and
doesn't belong in a join operation, so it is omitted.
---
.../FlowSensitive/DataflowEnvironment.h | 25 +++++++-
.../FlowSensitive/DataflowEnvironment.cpp | 59 +++++++++----------
.../FlowSensitive/SignAnalysisTest.cpp | 58 +++++++++---------
.../TypeErasedDataflowAnalysisTest.cpp | 29 ++++-----
4 files changed, 92 insertions(+), 79 deletions(-)
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 1543f900e401d..79100cccafccf 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -19,7 +19,6 @@
#include "clang/AST/DeclBase.h"
#include "clang/AST/Expr.h"
#include "clang/AST/Type.h"
-#include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
#include "clang/Analysis/FlowSensitive/Formula.h"
@@ -31,7 +30,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include <memory>
#include <type_traits>
#include <utility>
@@ -81,6 +79,8 @@ class Environment {
return ComparisonResult::Unknown;
}
+ /// DEPRECATED. Override `join` and/or `widen`, instead.
+ ///
/// Modifies `MergedVal` to approximate both `Val1` and `Val2`. This could
/// be a strict lattice join or a more general widening operation.
///
@@ -105,6 +105,27 @@ class Environment {
return true;
}
+ /// Modifies `JoinedVal` to approximate both `Val1` and `Val2`. This should
+ /// obey the properties of a lattice join.
+ ///
+ /// `Env1` and `Env2` can be used to query child values and path condition
+ /// implications of `Val1` and `Val2` respectively.
+ ///
+ /// Requirements:
+ ///
+ /// `Val1` and `Val2` must be distinct.
+ ///
+ /// `Val1`, `Val2`, and `JoinedVal` must model values of type `Type`.
+ ///
+ /// `Val1` and `Val2` must be assigned to the same storage location in
+ /// `Env1` and `Env2` respectively.
+ virtual void join(QualType Type, const Value &Val1, const Environment &Env1,
+ const Value &Val2, const Environment &Env2,
+ Value &JoinedVal, Environment &JoinedEnv) {
+ assert(merge(Type, Val1, Env1, Val2, Env2, JoinedVal, JoinedEnv) &&
+ "dropping merged value is unsupported");
+ }
+
/// This function may widen the current value -- replace it with an
/// approximation that can reach a fixed point more quickly than iterated
/// application of the transfer function alone. The previous value is
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 01db65866d135..24811ded970e8 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -86,15 +86,15 @@ static bool compareDistinctValues(QualType Type, Value &Val1,
llvm_unreachable("All cases covered in switch");
}
-/// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
-/// respectively, of the same type `Type`. Merging generally produces a single
+/// Attempts to join distinct values `Val1` and `Val2` in `Env1` and `Env2`,
+/// respectively, of the same type `Type`. Joining generally produces a single
/// value that (soundly) approximates the two inputs, although the actual
/// meaning depends on `Model`.
-static Value *mergeDistinctValues(QualType Type, Value &Val1,
- const Environment &Env1, Value &Val2,
- const Environment &Env2,
- Environment &MergedEnv,
- Environment::ValueModel &Model) {
+static Value *joinDistinctValues(QualType Type, Value &Val1,
+ const Environment &Env1, Value &Val2,
+ const Environment &Env2,
+ Environment &JoinedEnv,
+ Environment::ValueModel &Model) {
// Join distinct boolean values preserving information about the constraints
// in the respective path conditions.
if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
@@ -113,17 +113,17 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1,
// ```
auto &Expr1 = cast<BoolValue>(Val1).formula();
auto &Expr2 = cast<BoolValue>(Val2).formula();
- auto &A = MergedEnv.arena();
- auto &MergedVal = A.makeAtomRef(A.makeAtom());
- MergedEnv.assume(
+ auto &A = JoinedEnv.arena();
+ auto &JoinedVal = A.makeAtomRef(A.makeAtom());
+ JoinedEnv.assume(
A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
- A.makeEquals(MergedVal, Expr1)),
+ A.makeEquals(JoinedVal, Expr1)),
A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
- A.makeEquals(MergedVal, Expr2))));
- return &A.makeBoolValue(MergedVal);
+ A.makeEquals(JoinedVal, Expr2))));
+ return &A.makeBoolValue(JoinedVal);
}
- Value *MergedVal = nullptr;
+ Value *JoinedVal = nullptr;
if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
auto *RecordVal2 = cast<RecordValue>(&Val2);
@@ -131,24 +131,21 @@ static Value *mergeDistinctValues(QualType Type, Value &Val1,
// `RecordVal1` and `RecordVal2` may have different properties associated
// with them. Create a new `RecordValue` with the same location but
// without any properties so that we soundly approximate both values. If a
- // particular analysis needs to merge properties, it should do so in
- // `DataflowAnalysis::merge()`.
- MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
+ // particular analysis needs to join properties, it should do so in
+ // `DataflowAnalysis::join()`.
+ JoinedVal = &JoinedEnv.create<RecordValue>(RecordVal1->getLoc());
else
// If the locations for the two records are different, need to create a
// completely new value.
- MergedVal = MergedEnv.createValue(Type);
+ JoinedVal = JoinedEnv.createValue(Type);
} else {
- MergedVal = MergedEnv.createValue(Type);
+ JoinedVal = JoinedEnv.createValue(Type);
}
- // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
- // returns false to avoid storing unneeded values in `DACtx`.
- if (MergedVal)
- if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
- return MergedVal;
+ if (JoinedVal)
+ Model.join(Type, Val1, Env1, Val2, Env2, *JoinedVal, JoinedEnv);
- return nullptr;
+ return JoinedVal;
}
// When widening does not change `Current`, return value will equal `&Prev`.
@@ -240,9 +237,9 @@ joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
continue;
}
- if (Value *MergedVal = mergeDistinctValues(
+ if (Value *JoinedVal = joinDistinctValues(
Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
- Result.insert({Loc, MergedVal});
+ Result.insert({Loc, JoinedVal});
}
}
@@ -657,10 +654,10 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
// cast.
auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
assert(Func != nullptr);
- if (Value *MergedVal =
- mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
- *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
- JoinedEnv.ReturnVal = MergedVal;
+ if (Value *JoinedVal =
+ joinDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
+ *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
+ JoinedEnv.ReturnVal = JoinedVal;
}
if (EnvA.ReturnLoc == EnvB.ReturnLoc)
diff --git a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
index a6f4c45504fa6..b8fc528dbdce0 100644
--- a/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/SignAnalysisTest.cpp
@@ -364,18 +364,17 @@ class SignPropagationAnalysis
LatticeTransferState State(L, Env);
TransferMatchSwitch(Elt, getASTContext(), State);
}
- bool merge(QualType Type, const Value &Val1, const Environment &Env1,
- const Value &Val2, const Environment &Env2, Value &MergedVal,
- Environment &MergedEnv) override;
+ void join(QualType Type, const Value &Val1, const Environment &Env1,
+ const Value &Val2, const Environment &Env2, Value &MergedVal,
+ Environment &MergedEnv) override;
private:
CFGMatchSwitch<TransferState<NoopLattice>> TransferMatchSwitch;
};
-// Copied from crubit.
-BoolValue &mergeBoolValues(BoolValue &Bool1, const Environment &Env1,
- BoolValue &Bool2, const Environment &Env2,
- Environment &MergedEnv) {
+BoolValue &joinBoolValues(BoolValue &Bool1, const Environment &Env1,
+ BoolValue &Bool2, const Environment &Env2,
+ Environment &JoinedEnv) {
if (&Bool1 == &Bool2) {
return Bool1;
}
@@ -383,41 +382,40 @@ BoolValue &mergeBoolValues(BoolValue &Bool1, const Environment &Env1,
auto &B1 = Bool1.formula();
auto &B2 = Bool2.formula();
- auto &A = MergedEnv.arena();
- auto &MergedBool = MergedEnv.makeAtomicBoolValue();
+ auto &A = JoinedEnv.arena();
+ auto &JoinedBool = JoinedEnv.makeAtomicBoolValue();
// If `Bool1` and `Bool2` is constrained to the same true / false value,
- // `MergedBool` can be constrained similarly without needing to consider the
- // path taken - this simplifies the flow condition tracked in `MergedEnv`.
+ // `JoinedBool` can be constrained similarly without needing to consider the
+ // path taken - this simplifies the flow condition tracked in `JoinedEnv`.
// Otherwise, information about which path was taken is used to associate
- // `MergedBool` with `Bool1` and `Bool2`.
+ // `JoinedBool` with `Bool1` and `Bool2`.
if (Env1.proves(B1) && Env2.proves(B2)) {
- MergedEnv.assume(MergedBool.formula());
+ JoinedEnv.assume(JoinedBool.formula());
} else if (Env1.proves(A.makeNot(B1)) && Env2.proves(A.makeNot(B2))) {
- MergedEnv.assume(A.makeNot(MergedBool.formula()));
+ JoinedEnv.assume(A.makeNot(JoinedBool.formula()));
}
- return MergedBool;
+ return JoinedBool;
}
-bool SignPropagationAnalysis::merge(QualType Type, const Value &Val1,
- const Environment &Env1, const Value &Val2,
- const Environment &Env2, Value &MergedVal,
- Environment &MergedEnv) {
+void SignPropagationAnalysis::join(QualType Type, const Value &Val1,
+ const Environment &Env1, const Value &Val2,
+ const Environment &Env2, Value &JoinedVal,
+ Environment &JoinedEnv) {
if (!Type->isIntegerType())
- return false;
+ return;
SignProperties Ps1 = getSignProperties(Val1, Env1);
SignProperties Ps2 = getSignProperties(Val2, Env2);
if (!Ps1.Neg || !Ps2.Neg)
- return false;
- BoolValue &MergedNeg =
- mergeBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, MergedEnv);
- BoolValue &MergedZero =
- mergeBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, MergedEnv);
- BoolValue &MergedPos =
- mergeBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, MergedEnv);
- setSignProperties(MergedVal,
- SignProperties{&MergedNeg, &MergedZero, &MergedPos});
- return true;
+ return;
+ BoolValue &JoinedNeg =
+ joinBoolValues(*Ps1.Neg, Env1, *Ps2.Neg, Env2, JoinedEnv);
+ BoolValue &JoinedZero =
+ joinBoolValues(*Ps1.Zero, Env1, *Ps2.Zero, Env2, JoinedEnv);
+ BoolValue &JoinedPos =
+ joinBoolValues(*Ps1.Pos, Env1, *Ps2.Pos, Env2, JoinedEnv);
+ setSignProperties(JoinedVal,
+ SignProperties{&JoinedNeg, &JoinedZero, &JoinedPos});
}
template <typename Matcher>
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index 466d33358fd38..3bca9cced8d6f 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -672,26 +672,23 @@ class NullPointerAnalysis final
: ComparisonResult::Different;
}
- bool merge(QualType Type, const Value &Val1, const Environment &Env1,
- const Value &Val2, const Environment &Env2, Value &MergedVal,
- Environment &MergedEnv) override {
- // Nothing to say about a value that is not a pointer.
+ void join(QualType Type, const Value &Val1, const Environment &Env1,
+ const Value &Val2, const Environment &Env2, Value &JoinedVal,
+ Environment &JoinedEnv) override {
+ // Nothing to say about a value that is not a pointer...
if (!Type->isPointerType())
- return false;
+ return;
+ // ... or, a pointer without the `is_null` property.
auto *IsNull1 = cast_or_null<BoolValue>(Val1.getProperty("is_null"));
- if (IsNull1 == nullptr)
- return false;
-
auto *IsNull2 = cast_or_null<BoolValue>(Val2.getProperty("is_null"));
- if (IsNull2 == nullptr)
- return false;
+ if (IsNull1 == nullptr || IsNull2 == nullptr)
+ return;
if (IsNull1 == IsNull2)
- MergedVal.setProperty("is_null", *IsNull1);
+ JoinedVal.setProperty("is_null", *IsNull1);
else
- MergedVal.setProperty("is_null", MergedEnv.makeTopBoolValue());
- return true;
+ JoinedVal.setProperty("is_null", JoinedEnv.makeTopBoolValue());
}
};
@@ -1176,7 +1173,7 @@ TEST_F(FlowConditionTest, Join) {
// Note: currently, arbitrary function calls are uninterpreted, so the test
// exercises this case. If and when we change that, this test will not add to
// coverage (although it may still test a valuable case).
-TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
+TEST_F(FlowConditionTest, OpaqueFlowConditionJoinsToOpaqueBool) {
std::string Code = R"(
bool foo();
@@ -1211,7 +1208,7 @@ TEST_F(FlowConditionTest, OpaqueFlowConditionMergesToOpaqueBool) {
// the first instance), so the test exercises this case. If and when we change
// that, this test will not add to coverage (although it may still test a
// valuable case).
-TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
+TEST_F(FlowConditionTest, OpaqueFieldFlowConditionJoinsToOpaqueBool) {
std::string Code = R"(
struct Rec {
Rec* Next;
@@ -1249,7 +1246,7 @@ TEST_F(FlowConditionTest, OpaqueFieldFlowConditionMergesToOpaqueBool) {
// condition is not meaningfully interpreted. Adds to above by nesting the
// interestnig case inside a normal branch. This protects against degenerate
// solutions which only test for empty flow conditions, for example.
-TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchMergesToOpaqueBool) {
+TEST_F(FlowConditionTest, OpaqueFlowConditionInsideBranchJoinsToOpaqueBool) {
std::string Code = R"(
bool foo();
More information about the cfe-commits
mailing list