[clang] [clang][dataflow] Fully support Environment construction for Stmt analysis. (PR #91616)

Samira Bazuzi via cfe-commits cfe-commits at lists.llvm.org
Fri May 10 08:07:43 PDT 2024


https://github.com/bazuzi updated https://github.com/llvm/llvm-project/pull/91616

>From b62a95001e32a0c1d63204950481eb500c9d0275 Mon Sep 17 00:00:00 2001
From: Samira Bazuzi <bazuzi at google.com>
Date: Thu, 9 May 2024 12:00:06 -0400
Subject: [PATCH 1/3] [clang][dataflow] Fully support Environment construction
 for Stmt analysis.

Assume in fewer places that the analysis is of a FunctionDecl, and initialize the Environment properly for Stmts.

Moves constructors for Environment to header to make it more obvious
that there are only minor differences between them and very little
initialization in the constructors.

Tested check-clang-tooling.
---
 .../FlowSensitive/DataflowEnvironment.h       | 133 ++++++++++++------
 .../FlowSensitive/DataflowEnvironment.cpp     | 113 +++++++--------
 .../TypeErasedDataflowAnalysis.cpp            |   2 +-
 .../FlowSensitive/DataflowEnvironmentTest.cpp |  33 +++++
 .../Analysis/FlowSensitive/TestingSupport.h   |   4 +-
 5 files changed, 176 insertions(+), 109 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index cdf89c7def2c9..dfc3f6a35e4e0 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -19,6 +19,7 @@
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/Type.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
 #include "clang/Analysis/FlowSensitive/Formula.h"
@@ -28,8 +29,10 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cassert>
 #include <memory>
 #include <type_traits>
 #include <utility>
@@ -155,7 +158,9 @@ class Environment {
 
   /// Creates an environment that uses `DACtx` to store objects that encompass
   /// the state of a program.
-  explicit Environment(DataflowAnalysisContext &DACtx);
+  explicit Environment(DataflowAnalysisContext &DACtx)
+      : DACtx(&DACtx),
+        FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
 
   // Copy-constructor is private, Environments should not be copied. See fork().
   Environment &operator=(const Environment &Other) = delete;
@@ -164,23 +169,25 @@ class Environment {
   Environment &operator=(Environment &&Other) = default;
 
   /// Creates an environment that uses `DACtx` to store objects that encompass
-  /// the state of a program.
-  ///
-  /// If `DeclCtx` is a function, initializes the environment with symbolic
-  /// representations of the function parameters.
-  ///
-  /// If `DeclCtx` is a non-static member function, initializes the environment
-  /// with a symbolic representation of the `this` pointee.
-  Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx);
+  /// the state of a program, with `FD` as the current analysis target.
+  Environment(DataflowAnalysisContext &DACtx, const FunctionDecl &FD)
+      : Environment(DACtx) {
+    TargetStack.push_back(&FD);
+  }
+
+  /// Creates an environment that uses `DACtx` to store objects that encompass
+  /// the state of a program, with `S` as the current analysis target.
+  Environment(DataflowAnalysisContext &DACtx, Stmt &S) : Environment(DACtx) {
+    TargetStack.push_back(&S);
+  }
 
   /// Assigns storage locations and values to all parameters, captures, global
-  /// variables, fields and functions referenced in the function currently being
-  /// analyzed.
+  /// variables, fields and functions referenced in the target currently
+  /// being analyzed.
   ///
-  /// Requirements:
-  ///
-  ///  The function must have a body, i.e.
-  ///  `FunctionDecl::doesThisDecalarationHaveABody()` must be true.
+  /// If the current analysis target is a non-static member function,
+  /// initializes the environment with a symbolic representation of the `this`
+  /// pointee.
   void initialize();
 
   /// Returns a new environment that is a copy of this one.
@@ -365,46 +372,51 @@ class Environment {
   RecordStorageLocation &
   getResultObjectLocation(const Expr &RecordPRValue) const;
 
-  /// Returns the return value of the current function. This can be null if:
+  /// Returns the return value of the function currently being analyzed.
+  /// This can be null if:
   /// - The function has a void return type
   /// - No return value could be determined for the function, for example
   ///   because it calls a function without a body.
   ///
   /// Requirements:
-  ///  The current function must have a non-reference return type.
+  ///  The current analysis target must be a function and must have a
+  ///  non-reference return type.
   Value *getReturnValue() const {
     assert(getCurrentFunc() != nullptr &&
            !getCurrentFunc()->getReturnType()->isReferenceType());
     return ReturnVal;
   }
 
-  /// Returns the storage location for the reference returned by the current
-  /// function. This can be null if function doesn't return a single consistent
-  /// reference.
+  /// Returns the storage location for the reference returned by the function
+  /// currently being analyzed. This can be null if the function doesn't return
+  /// a single consistent reference.
   ///
   /// Requirements:
-  ///  The current function must have a reference return type.
+  ///  The current analysis target must be a function and must have a reference
+  ///  return type.
   StorageLocation *getReturnStorageLocation() const {
     assert(getCurrentFunc() != nullptr &&
            getCurrentFunc()->getReturnType()->isReferenceType());
     return ReturnLoc;
   }
 
-  /// Sets the return value of the current function.
+  /// Sets the return value of the function currently being analyzed.
   ///
   /// Requirements:
-  ///  The current function must have a non-reference return type.
+  ///  The current analysis target must be a function and must have a
+  ///  non-reference return type.
   void setReturnValue(Value *Val) {
     assert(getCurrentFunc() != nullptr &&
            !getCurrentFunc()->getReturnType()->isReferenceType());
     ReturnVal = Val;
   }
 
-  /// Sets the storage location for the reference returned by the current
-  /// function.
+  /// Sets the storage location for the reference returned by the function
+  /// currently being analyzed.
   ///
   /// Requirements:
-  ///  The current function must have a reference return type.
+  ///  The current analysis target must be a function and must have a reference
+  ///  return type.
   void setReturnStorageLocation(StorageLocation *Loc) {
     assert(getCurrentFunc() != nullptr &&
            getCurrentFunc()->getReturnType()->isReferenceType());
@@ -641,23 +653,25 @@ class Environment {
   /// (or the flow condition is overly constraining) or if the solver times out.
   bool allows(const Formula &) const;
 
-  /// Returns the `DeclContext` of the block being analysed, if any. Otherwise,
-  /// returns null.
-  const DeclContext *getDeclCtx() const { return CallStack.back(); }
+  /// Returns the current analysis target.
+  llvm::PointerUnion<const FunctionDecl *, Stmt *> getCurrentTarget() const {
+    return TargetStack.back();
+  }
 
   /// Returns the function currently being analyzed, or null if the code being
   /// analyzed isn't part of a function.
   const FunctionDecl *getCurrentFunc() const {
-    return dyn_cast<FunctionDecl>(getDeclCtx());
+    return getCurrentTarget().dyn_cast<const FunctionDecl *>();
   }
 
-  /// Returns the size of the call stack.
-  size_t callStackSize() const { return CallStack.size(); }
+  /// Returns the size of the target stack. All but the first item in the stack
+  /// are expected to be function calls.
+  size_t targetStackSize() const { return TargetStack.size(); }
 
   /// Returns whether this `Environment` can be extended to analyze the given
   /// `Callee` (i.e. if `pushCall` can be used), with recursion disallowed and a
   /// given `MaxDepth`.
-  bool canDescend(unsigned MaxDepth, const DeclContext *Callee) const;
+  bool canDescend(unsigned MaxDepth, const FunctionDecl *Callee) const;
 
   /// Returns the `DataflowAnalysisContext` used by the environment.
   DataflowAnalysisContext &getDataflowAnalysisContext() const { return *DACtx; }
@@ -718,9 +732,39 @@ class Environment {
   void pushCallInternal(const FunctionDecl *FuncDecl,
                         ArrayRef<const Expr *> Args);
 
+  // FIXME: Add support for resetting globals after function calls to enable
+  // the implementation of sound analyses.
+
   /// Assigns storage locations and values to all global variables, fields
-  /// and functions referenced in `FuncDecl`. `FuncDecl` must have a body.
-  void initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl);
+  /// and functions referenced in `Source`.
+  template <typename FuncOrStmt>
+  void initFieldsGlobalsAndFuncs(const FuncOrStmt *Source) {
+
+    ReferencedDecls Referenced = getReferencedDecls(*Source);
+
+    // These have to be added before the lines that follow to ensure that
+    // `create*` work correctly for structs.
+    DACtx->addModeledFields(Referenced.Fields);
+
+    for (const VarDecl *D : Referenced.Globals) {
+      if (getStorageLocation(*D) != nullptr)
+        continue;
+
+      // We don't run transfer functions on the initializers of global
+      // variables, so they won't be associated with a value or storage
+      // location. We therefore intentionally don't pass an initializer to
+      // `createObject()`; in particular, this ensures that `createObject()`
+      // will initialize the fields of record-type variables with values.
+      setStorageLocation(*D, createObject(*D, nullptr));
+    }
+
+    for (const FunctionDecl *FD : Referenced.Functions) {
+      if (getStorageLocation(*FD) != nullptr)
+        continue;
+      auto &Loc = createStorageLocation(*FD);
+      setStorageLocation(*FD, Loc);
+    }
+  }
 
   static PrValueToResultObject
   buildResultObjectMap(DataflowAnalysisContext *DACtx,
@@ -728,19 +772,25 @@ class Environment {
                        RecordStorageLocation *ThisPointeeLoc,
                        RecordStorageLocation *LocForRecordReturnVal);
 
+  static PrValueToResultObject
+  buildResultObjectMap(DataflowAnalysisContext *DACtx, Stmt *S,
+                       RecordStorageLocation *ThisPointeeLoc,
+                       RecordStorageLocation *LocForRecordReturnVal);
+
   // `DACtx` is not null and not owned by this object.
   DataflowAnalysisContext *DACtx;
 
-  // FIXME: move the fields `CallStack`, `ResultObjectMap`, `ReturnVal`,
+  // FIXME: move the fields `TargetStack`, `ResultObjectMap`, `ReturnVal`,
   // `ReturnLoc` and `ThisPointeeLoc` into a separate call-context object,
   // shared between environments in the same call.
   // https://github.com/llvm/llvm-project/issues/59005
 
-  // `DeclContext` of the block being analysed if provided.
-  std::vector<const DeclContext *> CallStack;
+  // The stack of statements or functions being analyzed. Only the first item in
+  // the stack is expected to ever be a `Stmt *`.
+  std::vector<llvm::PointerUnion<const FunctionDecl *, Stmt *>> TargetStack;
 
   // Maps from prvalues of record type to their result objects. Shared between
-  // all environments for the same function.
+  // all environments for the same analysis target.
   // FIXME: It's somewhat unsatisfactory that we have to use a `shared_ptr`
   // here, though the cost is acceptable: The overhead of a `shared_ptr` is
   // incurred when it is copied, and this happens only relatively rarely (when
@@ -749,7 +799,7 @@ class Environment {
   std::shared_ptr<PrValueToResultObject> ResultObjectMap;
 
   // The following three member variables handle various different types of
-  // return values.
+  // return values when the current analysis target is a function.
   // - If the return type is not a reference and not a record: Value returned
   //   by the function.
   Value *ReturnVal = nullptr;
@@ -762,7 +812,8 @@ class Environment {
   RecordStorageLocation *LocForRecordReturnVal = nullptr;
 
   // The storage location of the `this` pointee. Should only be null if the
-  // function being analyzed is only a function and not a method.
+  // analysis target is not a function or is a function but not a
+  // method.
   RecordStorageLocation *ThisPointeeLoc = nullptr;
 
   // Maps from declarations and glvalue expression to storage locations that are
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index cb6c8b2ef1072..41bdeaf0375ef 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -16,17 +16,22 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Stmt.h"
 #include "clang/AST/Type.h"
 #include "clang/Analysis/FlowSensitive/ASTOps.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
 #include <cassert>
+#include <memory>
 #include <utility>
 
 #define DEBUG_TYPE "dataflow"
@@ -290,15 +295,15 @@ widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap,
 namespace {
 
 // Visitor that builds a map from record prvalues to result objects.
-// This traverses the body of the function to be analyzed; for each result
-// object that it encounters, it propagates the storage location of the result
-// object to all record prvalues that can initialize it.
+// This traverses the statement to be analyzed; for each result object that it
+// encounters, it propagates the storage location of the result object to all
+// record prvalues that can initialize it.
 class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> {
 public:
   // `ResultObjectMap` will be filled with a map from record prvalues to result
-  // object. If the function being analyzed returns a record by value,
-  // `LocForRecordReturnVal` is the location to which this record should be
-  // written; otherwise, it is null.
+  // object. If the statement being analyzed is a function body and returns a
+  // record by value, `LocForRecordReturnVal` is the location to which this
+  // record should be written; otherwise, it is null.
   explicit ResultObjectVisitor(
       llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap,
       RecordStorageLocation *LocForRecordReturnVal,
@@ -514,25 +519,20 @@ class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> {
 
 } // namespace
 
-Environment::Environment(DataflowAnalysisContext &DACtx)
-    : DACtx(&DACtx),
-      FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
-
-Environment::Environment(DataflowAnalysisContext &DACtx,
-                         const DeclContext &DeclCtx)
-    : Environment(DACtx) {
-  CallStack.push_back(&DeclCtx);
-}
-
 void Environment::initialize() {
-  const DeclContext *DeclCtx = getDeclCtx();
-  if (DeclCtx == nullptr)
+  llvm::PointerUnion<const FunctionDecl *, Stmt *> Target = getCurrentTarget();
+  if (Target.isNull())
     return;
 
-  const auto *FuncDecl = dyn_cast<FunctionDecl>(DeclCtx);
-  if (FuncDecl == nullptr)
+  if (Stmt *S = Target.dyn_cast<Stmt *>()) {
+    initFieldsGlobalsAndFuncs(S);
+    ResultObjectMap =
+        std::make_shared<PrValueToResultObject>(buildResultObjectMap(
+            DACtx, S, getThisPointeeStorageLocation(), LocForRecordReturnVal));
     return;
+  }
 
+  const auto *FuncDecl = Target.get<const FunctionDecl *>();
   assert(FuncDecl->doesThisDeclarationHaveABody());
 
   initFieldsGlobalsAndFuncs(FuncDecl);
@@ -546,7 +546,7 @@ void Environment::initialize() {
     LocForRecordReturnVal = &cast<RecordStorageLocation>(
         createStorageLocation(FuncDecl->getReturnType()));
 
-  if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(DeclCtx)) {
+  if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(FuncDecl)) {
     auto *Parent = MethodDecl->getParent();
     assert(Parent != nullptr);
 
@@ -558,7 +558,7 @@ void Environment::initialize() {
           setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr));
         } else if (Capture.capturesThis()) {
           const auto *SurroundingMethodDecl =
-              cast<CXXMethodDecl>(DeclCtx->getNonClosureAncestor());
+              cast<CXXMethodDecl>(FuncDecl->getNonClosureAncestor());
           QualType ThisPointeeType =
               SurroundingMethodDecl->getFunctionObjectParameterType();
           setThisPointeeStorageLocation(
@@ -585,37 +585,6 @@ void Environment::initialize() {
                            LocForRecordReturnVal));
 }
 
-// FIXME: Add support for resetting globals after function calls to enable
-// the implementation of sound analyses.
-void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
-  assert(FuncDecl->doesThisDeclarationHaveABody());
-
-  ReferencedDecls Referenced = getReferencedDecls(*FuncDecl);
-
-  // These have to be added before the lines that follow to ensure that
-  // `create*` work correctly for structs.
-  DACtx->addModeledFields(Referenced.Fields);
-
-  for (const VarDecl *D : Referenced.Globals) {
-    if (getStorageLocation(*D) != nullptr)
-      continue;
-
-    // We don't run transfer functions on the initializers of global variables,
-    // so they won't be associated with a value or storage location. We
-    // therefore intentionally don't pass an initializer to `createObject()`;
-    // in particular, this ensures that `createObject()` will initialize the
-    // fields of record-type variables with values.
-    setStorageLocation(*D, createObject(*D, nullptr));
-  }
-
-  for (const FunctionDecl *FD : Referenced.Functions) {
-    if (getStorageLocation(*FD) != nullptr)
-      continue;
-    auto &Loc = createStorageLocation(*FD);
-    setStorageLocation(*FD, Loc);
-  }
-}
-
 Environment Environment::fork() const {
   Environment Copy(*this);
   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
@@ -623,8 +592,14 @@ Environment Environment::fork() const {
 }
 
 bool Environment::canDescend(unsigned MaxDepth,
-                             const DeclContext *Callee) const {
-  return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
+                             const FunctionDecl *Callee) const {
+  return TargetStack.size() <= MaxDepth &&
+         !std::any_of(
+             TargetStack.begin(), TargetStack.end(),
+             [Callee](const llvm::PointerUnion<const FunctionDecl *, Stmt *>
+                          &Target) {
+               return Callee == Target.dyn_cast<const FunctionDecl *>();
+             });
 }
 
 Environment Environment::pushCall(const CallExpr *Call) const {
@@ -669,7 +644,7 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
   assert(FuncDecl->getDefinition() != nullptr);
   FuncDecl = FuncDecl->getDefinition();
 
-  CallStack.push_back(FuncDecl);
+  TargetStack.push_back(FuncDecl);
 
   initFieldsGlobalsAndFuncs(FuncDecl);
 
@@ -753,7 +728,7 @@ LatticeEffect Environment::widen(const Environment &PrevEnv,
   assert(ReturnLoc == PrevEnv.ReturnLoc);
   assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal);
   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
-  assert(CallStack == PrevEnv.CallStack);
+  assert(TargetStack == PrevEnv.TargetStack);
   assert(ResultObjectMap == PrevEnv.ResultObjectMap);
 
   auto Effect = LatticeEffect::Unchanged;
@@ -788,22 +763,20 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
   assert(EnvA.DACtx == EnvB.DACtx);
   assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal);
   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
-  assert(EnvA.CallStack == EnvB.CallStack);
+  assert(EnvA.TargetStack == EnvB.TargetStack);
   assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap);
 
   Environment JoinedEnv(*EnvA.DACtx);
 
-  JoinedEnv.CallStack = EnvA.CallStack;
+  JoinedEnv.TargetStack = EnvA.TargetStack;
   JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap;
   JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal;
   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
 
-  if (EnvA.CallStack.empty()) {
+  if (EnvA.TargetStack.empty()) {
     JoinedEnv.ReturnVal = nullptr;
   } else {
-    // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
-    // cast.
-    auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
+    auto *Func = EnvA.getCurrentFunc();
     assert(Func != nullptr);
     JoinedEnv.ReturnVal =
         joinValues(Func->getReturnType(), EnvA.ReturnVal, EnvA, EnvB.ReturnVal,
@@ -1229,16 +1202,26 @@ Environment::PrValueToResultObject Environment::buildResultObjectMap(
     RecordStorageLocation *LocForRecordReturnVal) {
   assert(FuncDecl->doesThisDeclarationHaveABody());
 
-  PrValueToResultObject Map;
+  PrValueToResultObject Map = buildResultObjectMap(
+      DACtx, FuncDecl->getBody(), ThisPointeeLoc, LocForRecordReturnVal);
 
   ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx);
   if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(FuncDecl))
     Visitor.TraverseConstructorInits(Ctor, ThisPointeeLoc);
-  Visitor.TraverseStmt(FuncDecl->getBody());
 
   return Map;
 }
 
+Environment::PrValueToResultObject Environment::buildResultObjectMap(
+    DataflowAnalysisContext *DACtx, Stmt *S,
+    RecordStorageLocation *ThisPointeeLoc,
+    RecordStorageLocation *LocForRecordReturnVal) {
+  PrValueToResultObject Map;
+  ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx);
+  Visitor.TraverseStmt(S);
+  return Map;
+}
+
 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
                                                  const Environment &Env) {
   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 12eff4dd4b781..254007eb80725 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -476,7 +476,7 @@ runTypeErasedDataflowAnalysis(
   PrettyStackTraceAnalysis CrashInfo(ACFG, "runTypeErasedDataflowAnalysis");
 
   std::optional<Environment> MaybeStartingEnv;
-  if (InitEnv.callStackSize() == 1) {
+  if (InitEnv.targetStackSize() == 1) {
     MaybeStartingEnv = InitEnv.fork();
     MaybeStartingEnv->initialize();
   }
diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp
index 4195648161246..ef0280e74072c 100644
--- a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp
@@ -9,6 +9,8 @@
 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
 #include "TestingSupport.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/Stmt.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
@@ -403,4 +405,35 @@ TEST_F(EnvironmentTest,
               Contains(Member));
 }
 
+TEST_F(EnvironmentTest, Stmt) {
+  using namespace ast_matchers;
+
+  std::string Code = R"cc(
+      struct S {int i;};
+      void foo() {
+        S AnS = S{1};
+      }
+    )cc";
+  auto Unit =
+      tooling::buildASTFromCodeWithArgs(Code, {"-fsyntax-only", "-std=c++11"});
+  auto &Context = Unit->getASTContext();
+
+  ASSERT_EQ(Context.getDiagnostics().getClient()->getNumErrors(), 0U);
+
+  auto *DeclStatement = const_cast<DeclStmt *>(
+      selectFirst<DeclStmt>("d", match(declStmt().bind("d"), Context)));
+  ASSERT_THAT(DeclStatement, NotNull());
+  auto *ConstructExpr = selectFirst<CXXConstructExpr>(
+      "c", match(cxxConstructExpr().bind("c"), Context));
+  ASSERT_THAT(ConstructExpr, NotNull());
+
+  // Verify that we can retrieve the result object location for the construction
+  // expression when we analyze the DeclStmt for `AnS`.
+  Environment Env(DAContext, *DeclStatement);
+  // Don't crash when initializing.
+  Env.initialize();
+  // And don't crash when retrieving the result object location.
+  Env.getResultObjectLocation(*ConstructExpr);
+}
+
 } // namespace
diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
index 3b0e05ed72220..7348f8b1740d0 100644
--- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
+++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
@@ -355,8 +355,8 @@ checkDataflow(AnalysisInputs<AnalysisT> AI,
   auto SetupTest = [&StmtToAnnotations,
                     PrevSetupTest = std::move(AI.SetupTest)](
                        AnalysisOutputs &AO) -> llvm::Error {
-    auto MaybeStmtToAnnotations = buildStatementToAnnotationMapping(
-        cast<FunctionDecl>(AO.InitEnv.getDeclCtx()), AO.Code);
+    auto MaybeStmtToAnnotations =
+        buildStatementToAnnotationMapping(AO.InitEnv.getCurrentFunc(), AO.Code);
     if (!MaybeStmtToAnnotations) {
       return MaybeStmtToAnnotations.takeError();
     }

>From 6982564302fb3c9f524ae68c47fa5603d1216e09 Mon Sep 17 00:00:00 2001
From: Samira Bazuzi <bazuzi at google.com>
Date: Thu, 9 May 2024 18:01:07 -0400
Subject: [PATCH 2/3] Add test and crash fix for analysis of Stmt.

---
 .../FlowSensitive/DataflowEnvironment.cpp     |  8 +++----
 .../TypeErasedDataflowAnalysisTest.cpp        | 24 +++++++++++++++++++
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 41bdeaf0375ef..32547db4fd119 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -773,14 +773,12 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
   JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal;
   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
 
-  if (EnvA.TargetStack.empty()) {
+  if (EnvA.TargetStack.empty() || !EnvA.getCurrentFunc()) {
     JoinedEnv.ReturnVal = nullptr;
   } else {
-    auto *Func = EnvA.getCurrentFunc();
-    assert(Func != nullptr);
     JoinedEnv.ReturnVal =
-        joinValues(Func->getReturnType(), EnvA.ReturnVal, EnvA, EnvB.ReturnVal,
-                   EnvB, JoinedEnv, Model);
+        joinValues(EnvA.getCurrentFunc()->getReturnType(), EnvA.ReturnVal, EnvA,
+                   EnvB.ReturnVal, EnvB, JoinedEnv, Model);
   }
 
   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index b0b579d2bc19e..2ddf0a6f66fd4 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -146,6 +146,30 @@ TEST_F(DataflowAnalysisTest, DiagnoseFunctionDiagnoserCalledOnEachElement) {
                                    " (Lifetime ends)\n")));
 }
 
+TEST_F(DataflowAnalysisTest, CanAnalyzeStmt) {
+  std::string Code = R"cc(
+      struct S {int i;};
+      S getAnS() {return S{1};};
+      void foo() {
+        S AnS = getAnS();
+      }
+    )cc";
+  AST = tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});
+  const auto &DeclStatement = matchNode<DeclStmt>(declStmt());
+  const auto &Func = matchNode<FunctionDecl>(functionDecl(hasName("foo")));
+
+  ACFG = std::make_unique<AdornedCFG>(llvm::cantFail(AdornedCFG::build(
+      Func, const_cast<DeclStmt &>(DeclStatement), AST->getASTContext())));
+
+  NoopAnalysis Analysis = NoopAnalysis(AST->getASTContext());
+  DACtx = std::make_unique<DataflowAnalysisContext>(
+      std::make_unique<WatchedLiteralsSolver>());
+  Environment Env(*DACtx, const_cast<DeclStmt &>(DeclStatement));
+
+  ASSERT_THAT_ERROR(runDataflowAnalysis(*ACFG, Analysis, Env).takeError(),
+                    llvm::Succeeded());
+}
+
 // Tests for the statement-to-block map.
 using StmtToBlockTest = DataflowAnalysisTest;
 

>From 112e23c593b184f24f0a4c34755f4c183a1a1f08 Mon Sep 17 00:00:00 2001
From: Samira Bazuzi <bazuzi at google.com>
Date: Fri, 10 May 2024 11:00:17 -0400
Subject: [PATCH 3/3] Store the initial analysis target outside of the
 CallStack.

Reverts TargetStack to CallStack and stores the initial target statement
and, when present, its containing FunctionDecl separately from
CallStack.
---
 .../FlowSensitive/DataflowEnvironment.h       | 57 +++++++++--------
 .../FlowSensitive/DataflowEnvironment.cpp     | 61 +++++++++----------
 .../TypeErasedDataflowAnalysis.cpp            |  2 +-
 3 files changed, 62 insertions(+), 58 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index dfc3f6a35e4e0..83ce1fa47b586 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -29,13 +29,13 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <memory>
 #include <type_traits>
 #include <utility>
+#include <vector>
 
 namespace clang {
 namespace dataflow {
@@ -169,25 +169,28 @@ class Environment {
   Environment &operator=(Environment &&Other) = default;
 
   /// Creates an environment that uses `DACtx` to store objects that encompass
-  /// the state of a program, with `FD` as the current analysis target.
-  Environment(DataflowAnalysisContext &DACtx, const FunctionDecl &FD)
-      : Environment(DACtx) {
-    TargetStack.push_back(&FD);
+  /// the state of a program, with `S` as the initial analysis target.
+  Environment(DataflowAnalysisContext &DACtx, Stmt &S) : Environment(DACtx) {
+    InitialTargetStmt = &S;
   }
 
   /// Creates an environment that uses `DACtx` to store objects that encompass
-  /// the state of a program, with `S` as the current analysis target.
-  Environment(DataflowAnalysisContext &DACtx, Stmt &S) : Environment(DACtx) {
-    TargetStack.push_back(&S);
+  /// the state of a program, with `FD` as the initial analysis target.
+  ///
+  /// Requirements:
+  ///
+  ///  The function must have a body, i.e.
+  ///  `FunctionDecl::doesThisDecalarationHaveABody()` must be true.
+  Environment(DataflowAnalysisContext &DACtx, const FunctionDecl &FD)
+      : Environment(DACtx, *FD.getBody()) {
+    InitialTargetFunc = &FD;
   }
 
   /// Assigns storage locations and values to all parameters, captures, global
-  /// variables, fields and functions referenced in the target currently
-  /// being analyzed.
+  /// variables, fields and functions referenced in the initial analysis target.
   ///
-  /// If the current analysis target is a non-static member function,
-  /// initializes the environment with a symbolic representation of the `this`
-  /// pointee.
+  /// If the target is a non-static member function, initializes the environment
+  /// with a symbolic representation of the `this` pointee.
   void initialize();
 
   /// Returns a new environment that is a copy of this one.
@@ -200,7 +203,7 @@ class Environment {
   /// forked flow condition references the original).
   Environment fork() const;
 
-  /// Creates and returns an environment to use for an inline analysis  of the
+  /// Creates and returns an environment to use for an inline analysis of the
   /// callee. Uses the storage location from each argument in the `Call` as the
   /// storage location for the corresponding parameter in the callee.
   ///
@@ -653,20 +656,14 @@ class Environment {
   /// (or the flow condition is overly constraining) or if the solver times out.
   bool allows(const Formula &) const;
 
-  /// Returns the current analysis target.
-  llvm::PointerUnion<const FunctionDecl *, Stmt *> getCurrentTarget() const {
-    return TargetStack.back();
-  }
-
   /// Returns the function currently being analyzed, or null if the code being
   /// analyzed isn't part of a function.
   const FunctionDecl *getCurrentFunc() const {
-    return getCurrentTarget().dyn_cast<const FunctionDecl *>();
+    return CallStack.empty() ? InitialTargetFunc : CallStack.back();
   }
 
-  /// Returns the size of the target stack. All but the first item in the stack
-  /// are expected to be function calls.
-  size_t targetStackSize() const { return TargetStack.size(); }
+  /// Returns the size of the call stack.
+  size_t callStackSize() const { return CallStack.size(); }
 
   /// Returns whether this `Environment` can be extended to analyze the given
   /// `Callee` (i.e. if `pushCall` can be used), with recursion disallowed and a
@@ -785,9 +782,17 @@ class Environment {
   // shared between environments in the same call.
   // https://github.com/llvm/llvm-project/issues/59005
 
-  // The stack of statements or functions being analyzed. Only the first item in
-  // the stack is expected to ever be a `Stmt *`.
-  std::vector<llvm::PointerUnion<const FunctionDecl *, Stmt *>> TargetStack;
+  // The stack of functions called from the initial analysis target and
+  // recursively being analyzed.
+  std::vector<const FunctionDecl *> CallStack;
+
+  // If the initial analysis target is a function, this is the function
+  // declaration. Otherwise, this is null.
+  const FunctionDecl *InitialTargetFunc = nullptr;
+  // If the initial analysis target is a function, this is its body. If the
+  // initial analysis target was not provided, this is null. Otherwise, this is
+  // the initial analysis target.
+  Stmt *InitialTargetStmt = nullptr;
 
   // Maps from prvalues of record type to their result objects. Shared between
   // all environments for the same analysis target.
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 32547db4fd119..8c2d5dc2f2f01 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -520,33 +520,30 @@ class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> {
 } // namespace
 
 void Environment::initialize() {
-  llvm::PointerUnion<const FunctionDecl *, Stmt *> Target = getCurrentTarget();
-  if (Target.isNull())
+  if (InitialTargetStmt == nullptr)
     return;
 
-  if (Stmt *S = Target.dyn_cast<Stmt *>()) {
-    initFieldsGlobalsAndFuncs(S);
+  if (InitialTargetFunc == nullptr) {
+    initFieldsGlobalsAndFuncs(InitialTargetStmt);
     ResultObjectMap =
         std::make_shared<PrValueToResultObject>(buildResultObjectMap(
-            DACtx, S, getThisPointeeStorageLocation(), LocForRecordReturnVal));
+            DACtx, InitialTargetStmt, getThisPointeeStorageLocation(),
+            LocForRecordReturnVal));
     return;
   }
 
-  const auto *FuncDecl = Target.get<const FunctionDecl *>();
-  assert(FuncDecl->doesThisDeclarationHaveABody());
-
-  initFieldsGlobalsAndFuncs(FuncDecl);
+  initFieldsGlobalsAndFuncs(InitialTargetFunc);
 
-  for (const auto *ParamDecl : FuncDecl->parameters()) {
+  for (const auto *ParamDecl : InitialTargetFunc->parameters()) {
     assert(ParamDecl != nullptr);
     setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
   }
 
-  if (FuncDecl->getReturnType()->isRecordType())
+  if (InitialTargetFunc->getReturnType()->isRecordType())
     LocForRecordReturnVal = &cast<RecordStorageLocation>(
-        createStorageLocation(FuncDecl->getReturnType()));
+        createStorageLocation(InitialTargetFunc->getReturnType()));
 
-  if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(FuncDecl)) {
+  if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(InitialTargetFunc)) {
     auto *Parent = MethodDecl->getParent();
     assert(Parent != nullptr);
 
@@ -558,7 +555,7 @@ void Environment::initialize() {
           setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr));
         } else if (Capture.capturesThis()) {
           const auto *SurroundingMethodDecl =
-              cast<CXXMethodDecl>(FuncDecl->getNonClosureAncestor());
+              cast<CXXMethodDecl>(InitialTargetFunc->getNonClosureAncestor());
           QualType ThisPointeeType =
               SurroundingMethodDecl->getFunctionObjectParameterType();
           setThisPointeeStorageLocation(
@@ -580,9 +577,10 @@ void Environment::initialize() {
 
   // We do this below the handling of `CXXMethodDecl` above so that we can
   // be sure that the storage location for `this` has been set.
-  ResultObjectMap = std::make_shared<PrValueToResultObject>(
-      buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(),
-                           LocForRecordReturnVal));
+  ResultObjectMap =
+      std::make_shared<PrValueToResultObject>(buildResultObjectMap(
+          DACtx, InitialTargetFunc, getThisPointeeStorageLocation(),
+          LocForRecordReturnVal));
 }
 
 Environment Environment::fork() const {
@@ -593,13 +591,7 @@ Environment Environment::fork() const {
 
 bool Environment::canDescend(unsigned MaxDepth,
                              const FunctionDecl *Callee) const {
-  return TargetStack.size() <= MaxDepth &&
-         !std::any_of(
-             TargetStack.begin(), TargetStack.end(),
-             [Callee](const llvm::PointerUnion<const FunctionDecl *, Stmt *>
-                          &Target) {
-               return Callee == Target.dyn_cast<const FunctionDecl *>();
-             });
+  return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
 }
 
 Environment Environment::pushCall(const CallExpr *Call) const {
@@ -644,7 +636,7 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
   assert(FuncDecl->getDefinition() != nullptr);
   FuncDecl = FuncDecl->getDefinition();
 
-  TargetStack.push_back(FuncDecl);
+  CallStack.push_back(FuncDecl);
 
   initFieldsGlobalsAndFuncs(FuncDecl);
 
@@ -728,8 +720,10 @@ LatticeEffect Environment::widen(const Environment &PrevEnv,
   assert(ReturnLoc == PrevEnv.ReturnLoc);
   assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal);
   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
-  assert(TargetStack == PrevEnv.TargetStack);
+  assert(CallStack == PrevEnv.CallStack);
   assert(ResultObjectMap == PrevEnv.ResultObjectMap);
+  assert(InitialTargetFunc == PrevEnv.InitialTargetFunc);
+  assert(InitialTargetStmt == PrevEnv.InitialTargetStmt);
 
   auto Effect = LatticeEffect::Unchanged;
 
@@ -763,22 +757,27 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
   assert(EnvA.DACtx == EnvB.DACtx);
   assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal);
   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
-  assert(EnvA.TargetStack == EnvB.TargetStack);
+  assert(EnvA.CallStack == EnvB.CallStack);
   assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap);
+  assert(EnvA.InitialTargetFunc == EnvB.InitialTargetFunc);
+  assert(EnvA.InitialTargetStmt == EnvB.InitialTargetStmt);
 
   Environment JoinedEnv(*EnvA.DACtx);
 
-  JoinedEnv.TargetStack = EnvA.TargetStack;
+  JoinedEnv.CallStack = EnvA.CallStack;
   JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap;
   JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal;
   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
+  JoinedEnv.InitialTargetFunc = EnvA.InitialTargetFunc;
+  JoinedEnv.InitialTargetStmt = EnvA.InitialTargetStmt;
 
-  if (EnvA.TargetStack.empty() || !EnvA.getCurrentFunc()) {
+  auto *Func = EnvA.getCurrentFunc();
+  if (!Func) {
     JoinedEnv.ReturnVal = nullptr;
   } else {
     JoinedEnv.ReturnVal =
-        joinValues(EnvA.getCurrentFunc()->getReturnType(), EnvA.ReturnVal, EnvA,
-                   EnvB.ReturnVal, EnvB, JoinedEnv, Model);
+        joinValues(Func->getReturnType(), EnvA.ReturnVal, EnvA, EnvB.ReturnVal,
+                   EnvB, JoinedEnv, Model);
   }
 
   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 254007eb80725..b923bfbf78176 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -476,7 +476,7 @@ runTypeErasedDataflowAnalysis(
   PrettyStackTraceAnalysis CrashInfo(ACFG, "runTypeErasedDataflowAnalysis");
 
   std::optional<Environment> MaybeStartingEnv;
-  if (InitEnv.targetStackSize() == 1) {
+  if (InitEnv.callStackSize() == 0 && InitEnv.getCurrentFunc()) {
     MaybeStartingEnv = InitEnv.fork();
     MaybeStartingEnv->initialize();
   }



More information about the cfe-commits mailing list