[clang] [clang][dataflow] Propagate locations from result objects to initializers. (PR #87320)

via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 2 01:01:09 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-analysis

Author: None (martinboehme)

<details>
<summary>Changes</summary>

Previously, we were propagating storage locations the other way around, i.e.
from initializers to result objects, using `RecordValue::getLoc()`. This gave
the wrong behavior in some cases -- see the newly added or fixed tests in this
patch.

In addition, this patch now unblocks removing the `RecordValue` class entirely,
as we no longer need `RecordValue::getLoc()`.

With this patch, the test `TransferTest.DifferentReferenceLocInJoin` started to
fail because the framework now always uses the same storge location for a
`MaterializeTemporaryExpr`, meaning that the code under test no longer set up
the desired state where a variable of reference type is mapped to two different
storage locations in environments being joined. Rather than trying to modify
this test to set up the test condition again, I have chosen to replace the test
with an equivalent test in DataflowEnvironmentTest.cpp that sets up the test
condition directly; because this test is more direct, it will also be less
brittle in the face of future changes.


---

Patch is 52.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87320.diff


6 Files Affected:

- (modified) clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h (+44-20) 
- (modified) clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (+304-98) 
- (modified) clang/lib/Analysis/FlowSensitive/Transfer.cpp (+73-99) 
- (modified) clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp (+3-10) 
- (modified) clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp (+43) 
- (modified) clang/unittests/Analysis/FlowSensitive/TransferTest.cpp (+115-57) 


``````````diff
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index c30bccd06674a4..dc3a9c239552be 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -30,6 +30,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <memory>
 #include <type_traits>
 #include <utility>
 
@@ -330,17 +331,6 @@ class Environment {
   /// location of the result object to pass in `this`, even though prvalues are
   /// otherwise not associated with storage locations.
   ///
-  /// FIXME: Currently, this simply returns a stable storage location for `E`,
-  /// but this doesn't do the right thing in scenarios like the following:
-  /// ```
-  /// MyClass c = some_condition()? MyClass(foo) : MyClass(bar);
-  /// ```
-  /// Here, `MyClass(foo)` and `MyClass(bar)` will have two different storage
-  /// locations, when in fact their storage locations should be the same.
-  /// Eventually, we want to propagate storage locations from result objects
-  /// down to the prvalues that initialize them, similar to the way that this is
-  /// done in Clang's CodeGen.
-  ///
   /// Requirements:
   ///  `E` must be a prvalue of record type.
   RecordStorageLocation &
@@ -448,7 +438,13 @@ class Environment {
   /// Initializes the fields (including synthetic fields) of `Loc` with values,
   /// unless values of the field type are not supported or we hit one of the
   /// limits at which we stop producing values.
-  void initializeFieldsWithValues(RecordStorageLocation &Loc);
+  /// If `Type` is provided, initializes only those fields that are modeled for
+  /// `Type`; this is intended for use in cases where `Loc` is a derived type
+  /// and we only want to initialize the fields of a base type.
+  void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type);
+  void initializeFieldsWithValues(RecordStorageLocation &Loc) {
+    initializeFieldsWithValues(Loc, Loc.getType());
+  }
 
   /// Assigns `Val` as the value of `Loc` in the environment.
   void setValue(const StorageLocation &Loc, Value &Val);
@@ -639,6 +635,9 @@ class Environment {
   LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
 
 private:
+  using PrValueToResultObject =
+      llvm::DenseMap<const Expr *, RecordStorageLocation *>;
+
   // The copy-constructor is for use in fork() only.
   Environment(const Environment &) = default;
 
@@ -668,8 +667,10 @@ class Environment {
   /// Initializes the fields (including synthetic fields) of `Loc` with values,
   /// unless values of the field type are not supported or we hit one of the
   /// limits at which we stop producing values (controlled by `Visited`,
-  /// `Depth`, and `CreatedValuesCount`).
-  void initializeFieldsWithValues(RecordStorageLocation &Loc,
+  /// `Depth`, and `CreatedValuesCount`). If `Type` is different from
+  /// `Loc.getType()`, initializes only those fields that are modeled for
+  /// `Type`.
+  void initializeFieldsWithValues(RecordStorageLocation &Loc, QualType Type,
                                   llvm::DenseSet<QualType> &Visited, int Depth,
                                   int &CreatedValuesCount);
 
@@ -688,22 +689,45 @@ class Environment {
   /// and functions referenced in `FuncDecl`. `FuncDecl` must have a body.
   void initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl);
 
+  static PrValueToResultObject
+  buildResultObjectMap(DataflowAnalysisContext *DACtx,
+                       const FunctionDecl *FuncDecl,
+                       RecordStorageLocation *ThisPointeeLoc,
+                       RecordStorageLocation *LocForRecordReturnVal);
+
   // `DACtx` is not null and not owned by this object.
   DataflowAnalysisContext *DACtx;
 
-  // FIXME: move the fields `CallStack`, `ReturnVal`, `ReturnLoc` and
-  // `ThisPointeeLoc` into a separate call-context object, shared between
-  // environments in the same call.
+  // FIXME: move the fields `CallStack`, `ResultObjectMap`, `ReturnVal`,
+  // `ReturnLoc` and `ThisPointeeLoc` into a separate call-context object,
+  // shared between environments in the same call.
   // https://github.com/llvm/llvm-project/issues/59005
 
   // `DeclContext` of the block being analysed if provided.
   std::vector<const DeclContext *> CallStack;
 
-  // Value returned by the function (if it has non-reference return type).
+  // Maps from prvalues of record type to their result objects. Shared between
+  // all environments for the same function.
+  // FIXME: It's somewhat unsatisfactory that we have to use a `shared_ptr`
+  // here, though the cost is acceptable: The overhead of a `shared_ptr` is
+  // incurred when it is copied, and this happens only relatively rarely (when
+  // we fork the environment). The need for a `shared_ptr` will go away once we
+  // introduce a shared call-context object (see above).
+  std::shared_ptr<PrValueToResultObject> ResultObjectMap;
+
+  // The following three member variables handle various different types of
+  // return values.
+  // - If the return type is not a reference and not a record: Value returned
+  //   by the function.
   Value *ReturnVal = nullptr;
-  // Storage location of the reference returned by the function (if it has
-  // reference return type).
+  // - If the return type is a reference: Storage location of the reference
+  //   returned by the function.
   StorageLocation *ReturnLoc = nullptr;
+  // - If the return type is a record or the function being analyzed is a
+  //   constructor: Storage location into which the return value should be
+  //   constructed.
+  RecordStorageLocation *LocForRecordReturnVal = nullptr;
+
   // The storage location of the `this` pointee. Should only be null if the
   // function being analyzed is only a function and not a method.
   RecordStorageLocation *ThisPointeeLoc = nullptr;
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index f729d676dd0de8..67d5c444d52cb6 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -15,6 +15,7 @@
 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Type.h"
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
@@ -353,6 +354,8 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
   for (auto *Child : S.children())
     if (Child != nullptr)
       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
+  if (const auto *DefaultArg = dyn_cast<CXXDefaultArgExpr>(&S))
+    getFieldsGlobalsAndFuncs(*DefaultArg->getExpr(), Fields, Vars, Funcs);
   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
 
@@ -385,6 +388,185 @@ getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
   }
 }
 
+namespace {
+
+// Visitor that builds a map from record prvalues to result objects.
+// This traverses the body of the function to be analyzed; for each result
+// object that it encounters, it propagates the storage location of the result
+// object to all record prvalues that can initialize it.
+class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> {
+public:
+  // `ResultObjectMap` will be filled with a map from record prvalues to result
+  // object. If the function being analyzed returns a record by value,
+  // `LocForRecordReturnVal` is the location to which this record should be
+  // written; otherwise, it is null.
+  explicit ResultObjectVisitor(
+      llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap,
+      RecordStorageLocation *LocForRecordReturnVal,
+      DataflowAnalysisContext &DACtx)
+      : ResultObjectMap(ResultObjectMap),
+        LocForRecordReturnVal(LocForRecordReturnVal), DACtx(DACtx) {}
+
+  bool shouldVisitImplicitCode() { return true; }
+
+  bool shouldVisitLambdaBody() const { return false; }
+
+  // Traverse all member and base initializers of `Ctor`. This function is not
+  // called by `RecursiveASTVisitor`; it should be called manually if we are
+  // analyzing a constructor. `ThisPointeeLoc` is the storage location that
+  // `this` points to.
+  void TraverseConstructorInits(const CXXConstructorDecl *Ctor,
+                                RecordStorageLocation *ThisPointeeLoc) {
+    assert(ThisPointeeLoc != nullptr);
+    for (const CXXCtorInitializer *Init : Ctor->inits()) {
+      Expr *InitExpr = Init->getInit();
+      if (FieldDecl *Field = Init->getMember();
+          Field != nullptr && Field->getType()->isRecordType()) {
+        PropagateResultObject(InitExpr, cast<RecordStorageLocation>(
+                                            ThisPointeeLoc->getChild(*Field)));
+      } else if (Init->getBaseClass()) {
+        PropagateResultObject(InitExpr, ThisPointeeLoc);
+      }
+
+      // Ensure that any result objects within `InitExpr` (e.g. temporaries)
+      // are also propagated to the prvalues that initialize them.
+      TraverseStmt(InitExpr);
+
+      // If this is a `CXXDefaultInitExpr`, also propagate any result objects
+      // within the default expression.
+      if (auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(InitExpr))
+        TraverseStmt(DefaultInit->getExpr());
+    }
+  }
+
+  bool TraverseBindingDecl(BindingDecl *BD) {
+    // `RecursiveASTVisitor` doesn't traverse holding variables for
+    // `BindingDecl`s by itself, so we need to tell it to.
+    if (VarDecl *HoldingVar = BD->getHoldingVar())
+      TraverseDecl(HoldingVar);
+    return RecursiveASTVisitor<ResultObjectVisitor>::TraverseBindingDecl(BD);
+  }
+
+  bool VisitVarDecl(VarDecl *VD) {
+    if (VD->getType()->isRecordType() && VD->hasInit())
+      PropagateResultObject(
+          VD->getInit(),
+          &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*VD)));
+    return true;
+  }
+
+  bool VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE) {
+    if (MTE->getType()->isRecordType())
+      PropagateResultObject(
+          MTE->getSubExpr(),
+          &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*MTE)));
+    return true;
+  }
+
+  bool VisitReturnStmt(ReturnStmt *Return) {
+    Expr *RetValue = Return->getRetValue();
+    if (RetValue != nullptr && RetValue->getType()->isRecordType() &&
+        RetValue->isPRValue())
+      PropagateResultObject(RetValue, LocForRecordReturnVal);
+    return true;
+  }
+
+  bool VisitExpr(Expr *E) {
+    // Clang's AST can have record-type prvalues without a result object -- for
+    // example as full-expressions contained in a compound statement or as
+    // arguments of call expressions. We notice this if we get here and a
+    // storage location has not yet been associated with `E`. In this case,
+    // treat this as if it was a `MaterializeTemporaryExpr`.
+    if (E->isPRValue() && E->getType()->isRecordType() &&
+        !ResultObjectMap.contains(E))
+      PropagateResultObject(
+          E, &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*E)));
+    return true;
+  }
+
+  // Assigns `Loc` as the result object location of `E`, then propagates the
+  // location to all lower-level prvalues that initialize the same object as
+  // `E` (or one of its base classes or member variables).
+  void PropagateResultObject(Expr *E, RecordStorageLocation *Loc) {
+    if (!E->isPRValue() || !E->getType()->isRecordType()) {
+      assert(false);
+      // Ensure we don't propagate the result object if we hit this in a
+      // release build.
+      return;
+    }
+
+    ResultObjectMap[E] = Loc;
+
+    // The following AST node kinds are "original initializers": They are the
+    // lowest-level AST node that initializes a given object, and nothing
+    // below them can initialize the same object (or part of it).
+    if (isa<CXXConstructExpr>(E) || isa<CallExpr>(E) || isa<LambdaExpr>(E) ||
+        isa<CXXDefaultArgExpr>(E) || isa<CXXDefaultInitExpr>(E) ||
+        isa<CXXStdInitializerListExpr>(E)) {
+      return;
+    }
+
+    if (auto *InitList = dyn_cast<InitListExpr>(E)) {
+      if (!InitList->isSemanticForm())
+        return;
+      if (InitList->isTransparent()) {
+        for (Expr *Init : InitList->inits())
+          PropagateResultObject(Init, Loc);
+        return;
+      }
+
+      RecordInitListHelper InitListHelper(InitList);
+
+      for (auto [Base, Init] : InitListHelper.base_inits()) {
+        assert(Base->getType().getCanonicalType() ==
+               Init->getType().getCanonicalType());
+
+        // Storage location for the base class is the same as that of the
+        // derived class because we "flatten" the object hierarchy and put all
+        // fields in `RecordStorageLocation` of the derived class.
+        PropagateResultObject(Init, Loc);
+      }
+
+      for (auto [Field, Init] : InitListHelper.field_inits()) {
+        // Fields of non-record type are handled in
+        // `TransferVisitor::VisitInitListExpr()`.
+        if (!Field->getType()->isRecordType())
+          continue;
+        PropagateResultObject(
+            Init, cast<RecordStorageLocation>(Loc->getChild(*Field)));
+      }
+      return;
+    }
+
+    if (auto *Op = dyn_cast<BinaryOperator>(E); Op && Op->isCommaOp()) {
+      PropagateResultObject(Op->getRHS(), Loc);
+      return;
+    }
+
+    if (auto *Cond = dyn_cast<AbstractConditionalOperator>(E)) {
+      PropagateResultObject(Cond->getTrueExpr(), Loc);
+      PropagateResultObject(Cond->getFalseExpr(), Loc);
+      return;
+    }
+
+    // All other expression nodes that propagate a record prvalue should have
+    // exactly one child.
+    llvm::SmallVector<Stmt *> Children(E->child_begin(), E->child_end());
+    if (Children.size() != 1)
+      E->dump();
+    assert(Children.size() == 1);
+    for (Stmt *S : Children)
+      PropagateResultObject(cast<Expr>(S), Loc);
+  }
+
+private:
+  llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap;
+  RecordStorageLocation *LocForRecordReturnVal;
+  DataflowAnalysisContext &DACtx;
+};
+
+} // namespace
+
 Environment::Environment(DataflowAnalysisContext &DACtx)
     : DACtx(&DACtx),
       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
@@ -400,17 +582,23 @@ void Environment::initialize() {
   if (DeclCtx == nullptr)
     return;
 
-  if (const auto *FuncDecl = dyn_cast<FunctionDecl>(DeclCtx)) {
-    assert(FuncDecl->doesThisDeclarationHaveABody());
+  const auto *FuncDecl = dyn_cast<FunctionDecl>(DeclCtx);
+  if (FuncDecl == nullptr)
+    return;
+
+  assert(FuncDecl->doesThisDeclarationHaveABody());
 
-    initFieldsGlobalsAndFuncs(FuncDecl);
+  initFieldsGlobalsAndFuncs(FuncDecl);
 
-    for (const auto *ParamDecl : FuncDecl->parameters()) {
-      assert(ParamDecl != nullptr);
-      setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
-    }
+  for (const auto *ParamDecl : FuncDecl->parameters()) {
+    assert(ParamDecl != nullptr);
+    setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
   }
 
+  if (FuncDecl->getReturnType()->isRecordType())
+    LocForRecordReturnVal = &cast<RecordStorageLocation>(
+        createStorageLocation(FuncDecl->getReturnType()));
+
   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(DeclCtx)) {
     auto *Parent = MethodDecl->getParent();
     assert(Parent != nullptr);
@@ -443,6 +631,12 @@ void Environment::initialize() {
         initializeFieldsWithValues(ThisLoc);
     }
   }
+
+  // We do this below the handling of `CXXMethodDecl` above so that we can
+  // be sure that the storage location for `this` has been set.
+  ResultObjectMap = std::make_shared<PrValueToResultObject>(
+      buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(),
+                           LocForRecordReturnVal));
 }
 
 // FIXME: Add support for resetting globals after function calls to enable
@@ -483,13 +677,18 @@ void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
     if (getStorageLocation(*D) != nullptr)
       continue;
 
-    setStorageLocation(*D, createObject(*D));
+    // We don't run transfer functions on the initializers of global variables,
+    // so they won't be associated with a value or storage location. We
+    // therefore intentionally don't pass an initializer to `createObject()`;
+    // in particular, this ensures that `createObject()` will initialize the
+    // fields of record-type variables with values.
+    setStorageLocation(*D, createObject(*D, nullptr));
   }
 
   for (const FunctionDecl *FD : Funcs) {
     if (getStorageLocation(*FD) != nullptr)
       continue;
-    auto &Loc = createStorageLocation(FD->getType());
+    auto &Loc = createStorageLocation(*FD);
     setStorageLocation(*FD, Loc);
   }
 }
@@ -518,6 +717,9 @@ Environment Environment::pushCall(const CallExpr *Call) const {
     }
   }
 
+  if (Call->getType()->isRecordType() && Call->isPRValue())
+    Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call);
+
   Env.pushCallInternal(Call->getDirectCallee(),
                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
 
@@ -528,6 +730,7 @@ Environment Environment::pushCall(const CXXConstructExpr *Call) const {
   Environment Env(*this);
 
   Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call);
+  Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call);
 
   Env.pushCallInternal(Call->getConstructor(),
                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
@@ -556,6 +759,10 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
     const VarDecl *Param = *ParamIt;
     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
   }
+
+  ResultObjectMap = std::make_shared<PrValueToResultObject>(
+      buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(),
+                           LocForRecordReturnVal));
 }
 
 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
@@ -599,6 +806,9 @@ bool Environment::equivalentTo(const Environment &Other,
   if (ReturnLoc != Other.ReturnLoc)
     return false;
 
+  if (LocForRecordReturnVal != Other.LocForRecordReturnVal)
+    return false;
+
   if (ThisPointeeLoc != Other.ThisPointeeLoc)
     return false;
 
@@ -622,8 +832,10 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
   assert(DACtx == PrevEnv.DACtx);
   assert(ReturnVal == PrevEnv.ReturnVal);
   assert(ReturnLoc == PrevEnv.ReturnLoc);
+  assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal);
   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
   assert(CallStack == PrevEnv.CallStack);
+  assert(ResultObjectMap == PrevEnv.ResultObjectMap);
 
   auto Effect = LatticeJoinEffect::Unchanged;
 
@@ -655,12 +867,16 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
                               Environment::ValueModel &Model,
                               ExprJoinBehavior ExprBehavior) {
   assert(EnvA.DACtx == EnvB.DACtx);
+  assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal);
   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
   assert(EnvA.CallStack == EnvB.CallStack);
+  assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap);
 
   Environment JoinedEnv(*EnvA.DACtx);
 
   JoinedEnv.CallStack = EnvA.CallStack;
+  JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap;
+  JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal;
   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
 
   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
@@ -729,6 +945,12 @@ StorageLocation &Environment::createStorageLocation(const Expr &E) {
 
 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
   assert(!DeclToLoc.contains(&D));
+  // The only kinds of declarations that may have a "variable" storage location
+  // are declarations of reference type and `BindingDecl`. For all other
+  // declaration, the storage location should be the stable storage location
+  // returned by `createStorageLocation()`.
+  assert(D.getType()->isReferenceType() || isa<BindingDecl>(D) ||
+         &Loc...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/87320


More information about the cfe-commits mailing list