[clang] 6441358 - [clang][dataflow] Add support for return values of reference type.
Martin Braenne via cfe-commits
cfe-commits at lists.llvm.org
Thu May 25 01:38:42 PDT 2023
Author: Martin Braenne
Date: 2023-05-25T08:38:33Z
New Revision: 64413584dacba1fccffa345f69043b3509ee1745
URL: https://github.com/llvm/llvm-project/commit/64413584dacba1fccffa345f69043b3509ee1745
DIFF: https://github.com/llvm/llvm-project/commit/64413584dacba1fccffa345f69043b3509ee1745.diff
LOG: [clang][dataflow] Add support for return values of reference type.
This patch changes the way `Environment::ReturnLoc` is set: Whereas previously it was set by the caller, it is now set by the callee (obviously, as we otherwise would not be able to return references).
The patch also introduces `Environment::ReturnVal`, which is used for non-reference-type return values. This allows these to be handled with the correct value category semantics; see also https://discourse.llvm.org/t/70086, which describes the ongoing migration to strict value category semantics.
Depends On D150776
Reviewed By: ymandel, xazax.hun
Differential Revision: https://reviews.llvm.org/D151194
Added:
Modified:
clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
clang/lib/Analysis/FlowSensitive/Transfer.cpp
clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 6f2f7f4004f84..a31f5ec116425 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -192,7 +192,8 @@ class Environment {
/// Moves gathered information back into `this` from a `CalleeEnv` created via
/// `pushCall`.
- void popCall(const Environment &CalleeEnv);
+ void popCall(const CallExpr *Call, const Environment &CalleeEnv);
+ void popCall(const CXXConstructExpr *Call, const Environment &CalleeEnv);
/// Returns true if and only if the environment is equivalent to `Other`, i.e
/// the two environments:
@@ -323,8 +324,51 @@ class Environment {
/// in the environment.
StorageLocation *getThisPointeeStorageLocation() const;
- /// Returns the storage location of the return value or null, if unset.
- StorageLocation *getReturnStorageLocation() const;
+ /// Returns the return value of the current function. This can be null if:
+ /// - The function has a void return type
+ /// - No return value could be determined for the function, for example
+ /// because it calls a function without a body.
+ ///
+ /// Requirements:
+ /// The current function must have a non-reference return type.
+ Value *getReturnValue() const {
+ assert(getCurrentFunc() != nullptr &&
+ !getCurrentFunc()->getReturnType()->isReferenceType());
+ return ReturnVal;
+ }
+
+ /// Returns the storage location for the reference returned by the current
+ /// function. This can be null if function doesn't return a single consistent
+ /// reference.
+ ///
+ /// Requirements:
+ /// The current function must have a reference return type.
+ StorageLocation *getReturnStorageLocation() const {
+ assert(getCurrentFunc() != nullptr &&
+ getCurrentFunc()->getReturnType()->isReferenceType());
+ return ReturnLoc;
+ }
+
+ /// Sets the return value of the current function.
+ ///
+ /// Requirements:
+ /// The current function must have a non-reference return type.
+ void setReturnValue(Value *Val) {
+ assert(getCurrentFunc() != nullptr &&
+ !getCurrentFunc()->getReturnType()->isReferenceType());
+ ReturnVal = Val;
+ }
+
+ /// Sets the storage location for the reference returned by the current
+ /// function.
+ ///
+ /// Requirements:
+ /// The current function must have a reference return type.
+ void setReturnStorageLocation(StorageLocation *Loc) {
+ assert(getCurrentFunc() != nullptr &&
+ getCurrentFunc()->getReturnType()->isReferenceType());
+ ReturnLoc = Loc;
+ }
/// Returns a pointer value that represents a null pointer. Calls with
/// `PointeeType` that are canonically equivalent will return the same result.
@@ -472,6 +516,12 @@ class Environment {
/// returns null.
const DeclContext *getDeclCtx() const { return CallStack.back(); }
+ /// Returns the function currently being analyzed, or null if the code being
+ /// analyzed isn't part of a function.
+ const FunctionDecl *getCurrentFunc() const {
+ return dyn_cast<FunctionDecl>(getDeclCtx());
+ }
+
/// Returns whether this `Environment` can be extended to analyze the given
/// `Callee` (i.e. if `pushCall` can be used), with recursion disallowed and a
/// given `MaxDepth`.
@@ -515,16 +565,18 @@ class Environment {
// `DACtx` is not null and not owned by this object.
DataflowAnalysisContext *DACtx;
-
- // FIXME: move the fields `CallStack`, `ReturnLoc` and `ThisPointeeLoc` into a
- // separate call-context object, shared between environments in the same call.
+ // FIXME: move the fields `CallStack`, `ReturnVal`, `ReturnLoc` and
+ // `ThisPointeeLoc` into a separate call-context object, shared between
+ // environments in the same call.
// https://github.com/llvm/llvm-project/issues/59005
// `DeclContext` of the block being analysed if provided.
std::vector<const DeclContext *> CallStack;
- // In a properly initialized `Environment`, `ReturnLoc` should only be null if
- // its `DeclContext` could not be cast to a `FunctionDecl`.
+ // Value returned by the function (if it has non-reference return type).
+ Value *ReturnVal = nullptr;
+ // Storage location of the reference returned by the function (if it has
+ // reference return type).
StorageLocation *ReturnLoc = nullptr;
// The storage location of the `this` pointee. Should only be null if the
// function being analyzed is only a function and not a method.
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index aaa2f6f19eecb..ee1a78472586d 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -267,9 +267,10 @@ Environment::Environment(DataflowAnalysisContext &DACtx)
Environment::Environment(const Environment &Other)
: DACtx(Other.DACtx), CallStack(Other.CallStack),
- ReturnLoc(Other.ReturnLoc), ThisPointeeLoc(Other.ThisPointeeLoc),
- DeclToLoc(Other.DeclToLoc), ExprToLoc(Other.ExprToLoc),
- LocToVal(Other.LocToVal), MemberLocToStruct(Other.MemberLocToStruct),
+ ReturnVal(Other.ReturnVal), ReturnLoc(Other.ReturnLoc),
+ ThisPointeeLoc(Other.ThisPointeeLoc), DeclToLoc(Other.DeclToLoc),
+ ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal),
+ MemberLocToStruct(Other.MemberLocToStruct),
FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) {
}
@@ -302,9 +303,6 @@ Environment::Environment(DataflowAnalysisContext &DACtx,
createValue(ParamDecl->getType().getNonReferenceType()))
setValue(ParamLoc, *ParamVal);
}
-
- QualType ReturnType = FuncDecl->getReturnType();
- ReturnLoc = &createStorageLocation(ReturnType);
}
if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
@@ -331,9 +329,6 @@ bool Environment::canDescend(unsigned MaxDepth,
Environment Environment::pushCall(const CallExpr *Call) const {
Environment Env(*this);
- // FIXME: Support references here.
- Env.ReturnLoc = getStorageLocation(*Call, SkipPast::Reference);
-
if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
if (!isa<CXXThisExpr>(Arg))
@@ -352,10 +347,9 @@ Environment Environment::pushCall(const CallExpr *Call) const {
Environment Environment::pushCall(const CXXConstructExpr *Call) const {
Environment Env(*this);
- // FIXME: Support references here.
- Env.ReturnLoc = getStorageLocation(*Call, SkipPast::Reference);
-
- Env.ThisPointeeLoc = Env.ReturnLoc;
+ Env.ThisPointeeLoc = &Env.createStorageLocation(Call->getType());
+ if (Value *Val = Env.createValue(Call->getType()))
+ Env.setValue(*Env.ThisPointeeLoc, *Val);
Env.pushCallInternal(Call->getConstructor(),
llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
@@ -365,6 +359,12 @@ Environment Environment::pushCall(const CXXConstructExpr *Call) const {
void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
ArrayRef<const Expr *> Args) {
+ // Canonicalize to the definition of the function. This ensures that we're
+ // putting arguments into the same `ParamVarDecl`s` that the callee will later
+ // be retrieving them from.
+ assert(FuncDecl->getDefinition() != nullptr);
+ FuncDecl = FuncDecl->getDefinition();
+
CallStack.push_back(FuncDecl);
initFieldsGlobalsAndFuncs(FuncDecl);
@@ -399,23 +399,46 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
}
}
-void Environment::popCall(const Environment &CalleeEnv) {
+void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
// We ignore `DACtx` because it's already the same in both. We don't want the
- // callee's `DeclCtx`, `ReturnLoc` or `ThisPointeeLoc`. We don't bring back
- // `DeclToLoc` and `ExprToLoc` because we want to be able to later analyze the
- // same callee in a
diff erent context, and `setStorageLocation` requires there
- // to not already be a storage location assigned. Conceptually, these maps
- // capture information from the local scope, so when popping that scope, we do
- // not propagate the maps.
+ // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't
+ // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later
+ // analyze the same callee in a
diff erent context, and `setStorageLocation`
+ // requires there to not already be a storage location assigned. Conceptually,
+ // these maps capture information from the local scope, so when popping that
+ // scope, we do not propagate the maps.
this->LocToVal = std::move(CalleeEnv.LocToVal);
this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct);
this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
+
+ if (Call->isGLValue()) {
+ if (CalleeEnv.ReturnLoc != nullptr)
+ setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc);
+ } else if (!Call->getType()->isVoidType()) {
+ if (CalleeEnv.ReturnVal != nullptr)
+ setValueStrict(*Call, *CalleeEnv.ReturnVal);
+ }
+}
+
+void Environment::popCall(const CXXConstructExpr *Call,
+ const Environment &CalleeEnv) {
+ // See also comment in `popCall(const CallExpr *, const Environment &)` above.
+ this->LocToVal = std::move(CalleeEnv.LocToVal);
+ this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct);
+ this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
+
+ if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
+ setValueStrict(*Call, *Val);
+ }
}
bool Environment::equivalentTo(const Environment &Other,
Environment::ValueModel &Model) const {
assert(DACtx == Other.DACtx);
+ if (ReturnVal != Other.ReturnVal)
+ return false;
+
if (ReturnLoc != Other.ReturnLoc)
return false;
@@ -453,6 +476,7 @@ bool Environment::equivalentTo(const Environment &Other,
LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
Environment::ValueModel &Model) {
assert(DACtx == PrevEnv.DACtx);
+ assert(ReturnVal == PrevEnv.ReturnVal);
assert(ReturnLoc == PrevEnv.ReturnLoc);
assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
assert(CallStack == PrevEnv.CallStack);
@@ -514,7 +538,6 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
LatticeJoinEffect Environment::join(const Environment &Other,
Environment::ValueModel &Model) {
assert(DACtx == Other.DACtx);
- assert(ReturnLoc == Other.ReturnLoc);
assert(ThisPointeeLoc == Other.ThisPointeeLoc);
assert(CallStack == Other.CallStack);
@@ -523,9 +546,38 @@ LatticeJoinEffect Environment::join(const Environment &Other,
Environment JoinedEnv(*DACtx);
JoinedEnv.CallStack = CallStack;
- JoinedEnv.ReturnLoc = ReturnLoc;
JoinedEnv.ThisPointeeLoc = ThisPointeeLoc;
+ if (ReturnVal == nullptr || Other.ReturnVal == nullptr) {
+ // `ReturnVal` might not always get set -- for example if we have a return
+ // statement of the form `return some_other_func()` and we decide not to
+ // analyze `some_other_func()`.
+ // In this case, we can't say anything about the joined return value -- we
+ // don't simply want to propagate the return value that we do have, because
+ // it might not be the correct one.
+ // This occurs for example in the test `ContextSensitiveMutualRecursion`.
+ JoinedEnv.ReturnVal = nullptr;
+ } else if (areEquivalentValues(*ReturnVal, *Other.ReturnVal)) {
+ JoinedEnv.ReturnVal = ReturnVal;
+ } else {
+ assert(!CallStack.empty());
+ // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
+ // cast.
+ auto *Func = dyn_cast<FunctionDecl>(CallStack.back());
+ assert(Func != nullptr);
+ if (Value *MergedVal =
+ mergeDistinctValues(Func->getReturnType(), *ReturnVal, *this,
+ *Other.ReturnVal, Other, JoinedEnv, Model)) {
+ JoinedEnv.ReturnVal = MergedVal;
+ Effect = LatticeJoinEffect::Changed;
+ }
+ }
+
+ if (ReturnLoc == Other.ReturnLoc)
+ JoinedEnv.ReturnLoc = ReturnLoc;
+ else
+ JoinedEnv.ReturnLoc = nullptr;
+
// FIXME: Once we're able to remove declarations from `DeclToLoc` when their
// lifetime ends, add an assertion that there aren't any entries in
// `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to
@@ -659,10 +711,6 @@ StorageLocation *Environment::getThisPointeeStorageLocation() const {
return ThisPointeeLoc;
}
-StorageLocation *Environment::getReturnStorageLocation() const {
- return ReturnLoc;
-}
-
PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
return DACtx->getOrCreateNullPointerValue(PointeeType);
}
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 9658f5bcd6a0e..a67f9ceed82e1 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -503,22 +503,21 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
if (Ret == nullptr)
return;
- auto *Val = Env.getValue(*Ret, SkipPast::None);
- if (Val == nullptr)
- return;
-
- // FIXME: Support reference-type returns.
- if (Val->getKind() == Value::Kind::Reference)
- return;
+ if (Ret->isPRValue()) {
+ auto *Val = Env.getValueStrict(*Ret);
+ if (Val == nullptr)
+ return;
- auto *Loc = Env.getReturnStorageLocation();
- assert(Loc != nullptr);
- // FIXME: Support reference-type returns.
- if (Loc->getType()->isReferenceType())
- return;
+ // FIXME: Model NRVO.
+ Env.setReturnValue(Val);
+ } else {
+ auto *Loc = Env.getStorageLocationStrict(*Ret);
+ if (Loc == nullptr)
+ return;
- // FIXME: Model NRVO.
- Env.setValue(*Loc, *Val);
+ // FIXME: Model NRVO.
+ Env.setReturnStorageLocation(Loc);
+ }
}
void VisitMemberExpr(const MemberExpr *S) {
@@ -857,13 +856,6 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
auto ExitBlock = CFCtx->getCFG().getExit().getBlockID();
- if (const auto *NonConstructExpr = dyn_cast<CallExpr>(S)) {
- // Note that it is important for the storage location of `S` to be set
- // before `pushCall`, because the latter uses it to set the storage
- // location for `return`.
- auto &ReturnLoc = Env.createStorageLocation(*S);
- Env.setStorageLocation(*S, ReturnLoc);
- }
auto CalleeEnv = Env.pushCall(S);
// FIXME: Use the same analysis as the caller for the callee. Note,
@@ -882,7 +874,7 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
auto ExitState = (*BlockToOutputState)[ExitBlock];
assert(ExitState);
- Env.popCall(ExitState->Env);
+ Env.popCall(S, ExitState->Env);
}
const StmtToEnvMap &StmtToEnv;
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 7c2a7014354aa..726f2b7bbdcc6 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -32,7 +32,9 @@ namespace {
using namespace clang;
using namespace dataflow;
using namespace test;
+using ::testing::Eq;
using ::testing::IsNull;
+using ::testing::Ne;
using ::testing::NotNull;
using ::testing::UnorderedElementsAre;
@@ -4239,13 +4241,33 @@ TEST(TransferTest, ContextSensitiveOptionDisabled) {
{BuiltinOptions{/*.ContextSensitiveOpts=*/std::nullopt}});
}
+TEST(TransferTest, ContextSensitiveReturnReference) {
+ std::string Code = R"(
+ class S {};
+ S& target(bool b, S &s) {
+ return s;
+ // [[p]]
+ }
+ )";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+
+ const ValueDecl *SDecl = findValueDecl(ASTCtx, "s");
+ ASSERT_THAT(SDecl, NotNull());
+
+ auto *SLoc = Env.getStorageLocation(*SDecl);
+ ASSERT_THAT(SLoc, NotNull());
+
+ ASSERT_THAT(Env.getReturnStorageLocation(), Eq(SLoc));
+ },
+ {BuiltinOptions{ContextSensitiveOptions{}}});
+}
+
// This test is a regression test, based on a real crash.
-TEST(TransferTest, ContextSensitiveReturnReferenceFromNonReferenceLvalue) {
- // This code exercises an unusual code path. If we return an lvalue directly,
- // the code will catch that it's an l-value based on the `Value`'s kind. If we
- // pass through a dummy function, the framework won't populate a value at
- // all. In contrast, this code results in a (fresh) value, but it is not
- // `ReferenceValue`. This test verifies that we catch this case as well.
+TEST(TransferTest, ContextSensitiveReturnReferenceWithConditionalOperator) {
std::string Code = R"(
class S {};
S& target(bool b, S &s) {
@@ -4260,10 +4282,72 @@ TEST(TransferTest, ContextSensitiveReturnReferenceFromNonReferenceLvalue) {
ASSERT_THAT(Results.keys(), UnorderedElementsAre("p"));
const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+ const ValueDecl *SDecl = findValueDecl(ASTCtx, "s");
+ ASSERT_THAT(SDecl, NotNull());
+
+ auto *SLoc = Env.getStorageLocation(*SDecl);
+ ASSERT_THAT(SLoc, NotNull());
+ EXPECT_THAT(Env.getValue(*SLoc), NotNull());
+
auto *Loc = Env.getReturnStorageLocation();
ASSERT_THAT(Loc, NotNull());
+ EXPECT_THAT(Env.getValue(*Loc), NotNull());
+
+ // TODO: We would really like to make this stronger assertion, but that
+ // doesn't work because we don't propagate values correctly through
+ // the conditional operator yet.
+ // ASSERT_THAT(Loc, Eq(SLoc));
+ },
+ {BuiltinOptions{ContextSensitiveOptions{}}});
+}
+
+TEST(TransferTest, ContextSensitiveReturnOneOfTwoReferences) {
+ std::string Code = R"(
+ class S {};
+ S &callee(bool b, S &s1_parm, S &s2_parm) {
+ if (b)
+ return s1_parm;
+ else
+ return s2_parm;
+ }
+ void target(bool b) {
+ S s1;
+ S s2;
+ S &return_s1 = s1;
+ S &return_s2 = s2;
+ S &return_dont_know = callee(b, s1, s2);
+ // [[p]]
+ }
+ )";
+ runDataflow(
+ Code,
+ [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+ ASTContext &ASTCtx) {
+ const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
- EXPECT_THAT(Env.getValue(*Loc), IsNull());
+ const ValueDecl *S1 = findValueDecl(ASTCtx, "s1");
+ ASSERT_THAT(S1, NotNull());
+ const ValueDecl *S2 = findValueDecl(ASTCtx, "s2");
+ ASSERT_THAT(S2, NotNull());
+ const ValueDecl *ReturnS1 = findValueDecl(ASTCtx, "return_s1");
+ ASSERT_THAT(ReturnS1, NotNull());
+ const ValueDecl *ReturnS2 = findValueDecl(ASTCtx, "return_s2");
+ ASSERT_THAT(ReturnS2, NotNull());
+ const ValueDecl *ReturnDontKnow =
+ findValueDecl(ASTCtx, "return_dont_know");
+ ASSERT_THAT(ReturnDontKnow, NotNull());
+
+ StorageLocation *S1Loc = Env.getStorageLocation(*S1);
+ StorageLocation *S2Loc = Env.getStorageLocation(*S2);
+
+ EXPECT_THAT(Env.getStorageLocation(*ReturnS1), Eq(S1Loc));
+ EXPECT_THAT(Env.getStorageLocation(*ReturnS2), Eq(S2Loc));
+
+ // In the case where we don't have a consistent storage location for
+ // the return value, the framework creates a new storage location, which
+ // should be
diff erent from the storage locations of `s1` and `s2`.
+ EXPECT_THAT(Env.getStorageLocation(*ReturnDontKnow), Ne(S1Loc));
+ EXPECT_THAT(Env.getStorageLocation(*ReturnDontKnow), Ne(S2Loc));
},
{BuiltinOptions{ContextSensitiveOptions{}}});
}
More information about the cfe-commits
mailing list