[clang] [clang][dataflow] Fully support Environment construction for Stmt analysis. (PR #91616)

via cfe-commits cfe-commits at lists.llvm.org
Thu May 9 09:13:34 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-analysis

@llvm/pr-subscribers-clang

Author: Samira Bazuzi (bazuzi)

<details>
<summary>Changes</summary>

Assume in fewer places that the analysis is of a FunctionDecl, and initialize the Environment properly for Stmts.

Moves constructors for Environment to header to make it more obvious that there are only minor differences between them and very little initialization in the constructors.

Tested with check-clang-tooling.

---

Patch is 24.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91616.diff


5 Files Affected:

- (modified) clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h (+92-41) 
- (modified) clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (+48-65) 
- (modified) clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp (+1-1) 
- (modified) clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp (+33) 
- (modified) clang/unittests/Analysis/FlowSensitive/TestingSupport.h (+2-2) 


``````````diff
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index cdf89c7def2c..dfc3f6a35e4e 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -19,6 +19,7 @@
 #include "clang/AST/DeclBase.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/Type.h"
+#include "clang/Analysis/FlowSensitive/ASTOps.h"
 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
 #include "clang/Analysis/FlowSensitive/Formula.h"
@@ -28,8 +29,10 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cassert>
 #include <memory>
 #include <type_traits>
 #include <utility>
@@ -155,7 +158,9 @@ class Environment {
 
   /// Creates an environment that uses `DACtx` to store objects that encompass
   /// the state of a program.
-  explicit Environment(DataflowAnalysisContext &DACtx);
+  explicit Environment(DataflowAnalysisContext &DACtx)
+      : DACtx(&DACtx),
+        FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
 
   // Copy-constructor is private, Environments should not be copied. See fork().
   Environment &operator=(const Environment &Other) = delete;
@@ -164,23 +169,25 @@ class Environment {
   Environment &operator=(Environment &&Other) = default;
 
   /// Creates an environment that uses `DACtx` to store objects that encompass
-  /// the state of a program.
-  ///
-  /// If `DeclCtx` is a function, initializes the environment with symbolic
-  /// representations of the function parameters.
-  ///
-  /// If `DeclCtx` is a non-static member function, initializes the environment
-  /// with a symbolic representation of the `this` pointee.
-  Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx);
+  /// the state of a program, with `FD` as the current analysis target.
+  Environment(DataflowAnalysisContext &DACtx, const FunctionDecl &FD)
+      : Environment(DACtx) {
+    TargetStack.push_back(&FD);
+  }
+
+  /// Creates an environment that uses `DACtx` to store objects that encompass
+  /// the state of a program, with `S` as the current analysis target.
+  Environment(DataflowAnalysisContext &DACtx, Stmt &S) : Environment(DACtx) {
+    TargetStack.push_back(&S);
+  }
 
   /// Assigns storage locations and values to all parameters, captures, global
-  /// variables, fields and functions referenced in the function currently being
-  /// analyzed.
+  /// variables, fields and functions referenced in the target currently
+  /// being analyzed.
   ///
-  /// Requirements:
-  ///
-  ///  The function must have a body, i.e.
-  ///  `FunctionDecl::doesThisDecalarationHaveABody()` must be true.
+  /// If the current analysis target is a non-static member function,
+  /// initializes the environment with a symbolic representation of the `this`
+  /// pointee.
   void initialize();
 
   /// Returns a new environment that is a copy of this one.
@@ -365,46 +372,51 @@ class Environment {
   RecordStorageLocation &
   getResultObjectLocation(const Expr &RecordPRValue) const;
 
-  /// Returns the return value of the current function. This can be null if:
+  /// Returns the return value of the function currently being analyzed.
+  /// This can be null if:
   /// - The function has a void return type
   /// - No return value could be determined for the function, for example
   ///   because it calls a function without a body.
   ///
   /// Requirements:
-  ///  The current function must have a non-reference return type.
+  ///  The current analysis target must be a function and must have a
+  ///  non-reference return type.
   Value *getReturnValue() const {
     assert(getCurrentFunc() != nullptr &&
            !getCurrentFunc()->getReturnType()->isReferenceType());
     return ReturnVal;
   }
 
-  /// Returns the storage location for the reference returned by the current
-  /// function. This can be null if function doesn't return a single consistent
-  /// reference.
+  /// Returns the storage location for the reference returned by the function
+  /// currently being analyzed. This can be null if the function doesn't return
+  /// a single consistent reference.
   ///
   /// Requirements:
-  ///  The current function must have a reference return type.
+  ///  The current analysis target must be a function and must have a reference
+  ///  return type.
   StorageLocation *getReturnStorageLocation() const {
     assert(getCurrentFunc() != nullptr &&
            getCurrentFunc()->getReturnType()->isReferenceType());
     return ReturnLoc;
   }
 
-  /// Sets the return value of the current function.
+  /// Sets the return value of the function currently being analyzed.
   ///
   /// Requirements:
-  ///  The current function must have a non-reference return type.
+  ///  The current analysis target must be a function and must have a
+  ///  non-reference return type.
   void setReturnValue(Value *Val) {
     assert(getCurrentFunc() != nullptr &&
            !getCurrentFunc()->getReturnType()->isReferenceType());
     ReturnVal = Val;
   }
 
-  /// Sets the storage location for the reference returned by the current
-  /// function.
+  /// Sets the storage location for the reference returned by the function
+  /// currently being analyzed.
   ///
   /// Requirements:
-  ///  The current function must have a reference return type.
+  ///  The current analysis target must be a function and must have a reference
+  ///  return type.
   void setReturnStorageLocation(StorageLocation *Loc) {
     assert(getCurrentFunc() != nullptr &&
            getCurrentFunc()->getReturnType()->isReferenceType());
@@ -641,23 +653,25 @@ class Environment {
   /// (or the flow condition is overly constraining) or if the solver times out.
   bool allows(const Formula &) const;
 
-  /// Returns the `DeclContext` of the block being analysed, if any. Otherwise,
-  /// returns null.
-  const DeclContext *getDeclCtx() const { return CallStack.back(); }
+  /// Returns the current analysis target.
+  llvm::PointerUnion<const FunctionDecl *, Stmt *> getCurrentTarget() const {
+    return TargetStack.back();
+  }
 
   /// Returns the function currently being analyzed, or null if the code being
   /// analyzed isn't part of a function.
   const FunctionDecl *getCurrentFunc() const {
-    return dyn_cast<FunctionDecl>(getDeclCtx());
+    return getCurrentTarget().dyn_cast<const FunctionDecl *>();
   }
 
-  /// Returns the size of the call stack.
-  size_t callStackSize() const { return CallStack.size(); }
+  /// Returns the size of the target stack. All but the first item in the stack
+  /// are expected to be function calls.
+  size_t targetStackSize() const { return TargetStack.size(); }
 
   /// Returns whether this `Environment` can be extended to analyze the given
   /// `Callee` (i.e. if `pushCall` can be used), with recursion disallowed and a
   /// given `MaxDepth`.
-  bool canDescend(unsigned MaxDepth, const DeclContext *Callee) const;
+  bool canDescend(unsigned MaxDepth, const FunctionDecl *Callee) const;
 
   /// Returns the `DataflowAnalysisContext` used by the environment.
   DataflowAnalysisContext &getDataflowAnalysisContext() const { return *DACtx; }
@@ -718,9 +732,39 @@ class Environment {
   void pushCallInternal(const FunctionDecl *FuncDecl,
                         ArrayRef<const Expr *> Args);
 
+  // FIXME: Add support for resetting globals after function calls to enable
+  // the implementation of sound analyses.
+
   /// Assigns storage locations and values to all global variables, fields
-  /// and functions referenced in `FuncDecl`. `FuncDecl` must have a body.
-  void initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl);
+  /// and functions referenced in `Source`.
+  template <typename FuncOrStmt>
+  void initFieldsGlobalsAndFuncs(const FuncOrStmt *Source) {
+
+    ReferencedDecls Referenced = getReferencedDecls(*Source);
+
+    // These have to be added before the lines that follow to ensure that
+    // `create*` work correctly for structs.
+    DACtx->addModeledFields(Referenced.Fields);
+
+    for (const VarDecl *D : Referenced.Globals) {
+      if (getStorageLocation(*D) != nullptr)
+        continue;
+
+      // We don't run transfer functions on the initializers of global
+      // variables, so they won't be associated with a value or storage
+      // location. We therefore intentionally don't pass an initializer to
+      // `createObject()`; in particular, this ensures that `createObject()`
+      // will initialize the fields of record-type variables with values.
+      setStorageLocation(*D, createObject(*D, nullptr));
+    }
+
+    for (const FunctionDecl *FD : Referenced.Functions) {
+      if (getStorageLocation(*FD) != nullptr)
+        continue;
+      auto &Loc = createStorageLocation(*FD);
+      setStorageLocation(*FD, Loc);
+    }
+  }
 
   static PrValueToResultObject
   buildResultObjectMap(DataflowAnalysisContext *DACtx,
@@ -728,19 +772,25 @@ class Environment {
                        RecordStorageLocation *ThisPointeeLoc,
                        RecordStorageLocation *LocForRecordReturnVal);
 
+  static PrValueToResultObject
+  buildResultObjectMap(DataflowAnalysisContext *DACtx, Stmt *S,
+                       RecordStorageLocation *ThisPointeeLoc,
+                       RecordStorageLocation *LocForRecordReturnVal);
+
   // `DACtx` is not null and not owned by this object.
   DataflowAnalysisContext *DACtx;
 
-  // FIXME: move the fields `CallStack`, `ResultObjectMap`, `ReturnVal`,
+  // FIXME: move the fields `TargetStack`, `ResultObjectMap`, `ReturnVal`,
   // `ReturnLoc` and `ThisPointeeLoc` into a separate call-context object,
   // shared between environments in the same call.
   // https://github.com/llvm/llvm-project/issues/59005
 
-  // `DeclContext` of the block being analysed if provided.
-  std::vector<const DeclContext *> CallStack;
+  // The stack of statements or functions being analyzed. Only the first item in
+  // the stack is expected to ever be a `Stmt *`.
+  std::vector<llvm::PointerUnion<const FunctionDecl *, Stmt *>> TargetStack;
 
   // Maps from prvalues of record type to their result objects. Shared between
-  // all environments for the same function.
+  // all environments for the same analysis target.
   // FIXME: It's somewhat unsatisfactory that we have to use a `shared_ptr`
   // here, though the cost is acceptable: The overhead of a `shared_ptr` is
   // incurred when it is copied, and this happens only relatively rarely (when
@@ -749,7 +799,7 @@ class Environment {
   std::shared_ptr<PrValueToResultObject> ResultObjectMap;
 
   // The following three member variables handle various different types of
-  // return values.
+  // return values when the current analysis target is a function.
   // - If the return type is not a reference and not a record: Value returned
   //   by the function.
   Value *ReturnVal = nullptr;
@@ -762,7 +812,8 @@ class Environment {
   RecordStorageLocation *LocForRecordReturnVal = nullptr;
 
   // The storage location of the `this` pointee. Should only be null if the
-  // function being analyzed is only a function and not a method.
+  // analysis target is not a function or is a function but not a
+  // method.
   RecordStorageLocation *ThisPointeeLoc = nullptr;
 
   // Maps from declarations and glvalue expression to storage locations that are
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index cb6c8b2ef107..41bdeaf0375e 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -16,17 +16,22 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Stmt.h"
 #include "clang/AST/Type.h"
 #include "clang/Analysis/FlowSensitive/ASTOps.h"
+#include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
 #include <cassert>
+#include <memory>
 #include <utility>
 
 #define DEBUG_TYPE "dataflow"
@@ -290,15 +295,15 @@ widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap,
 namespace {
 
 // Visitor that builds a map from record prvalues to result objects.
-// This traverses the body of the function to be analyzed; for each result
-// object that it encounters, it propagates the storage location of the result
-// object to all record prvalues that can initialize it.
+// This traverses the statement to be analyzed; for each result object that it
+// encounters, it propagates the storage location of the result object to all
+// record prvalues that can initialize it.
 class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> {
 public:
   // `ResultObjectMap` will be filled with a map from record prvalues to result
-  // object. If the function being analyzed returns a record by value,
-  // `LocForRecordReturnVal` is the location to which this record should be
-  // written; otherwise, it is null.
+  // object. If the statement being analyzed is a function body and returns a
+  // record by value, `LocForRecordReturnVal` is the location to which this
+  // record should be written; otherwise, it is null.
   explicit ResultObjectVisitor(
       llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap,
       RecordStorageLocation *LocForRecordReturnVal,
@@ -514,25 +519,20 @@ class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> {
 
 } // namespace
 
-Environment::Environment(DataflowAnalysisContext &DACtx)
-    : DACtx(&DACtx),
-      FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
-
-Environment::Environment(DataflowAnalysisContext &DACtx,
-                         const DeclContext &DeclCtx)
-    : Environment(DACtx) {
-  CallStack.push_back(&DeclCtx);
-}
-
 void Environment::initialize() {
-  const DeclContext *DeclCtx = getDeclCtx();
-  if (DeclCtx == nullptr)
+  llvm::PointerUnion<const FunctionDecl *, Stmt *> Target = getCurrentTarget();
+  if (Target.isNull())
     return;
 
-  const auto *FuncDecl = dyn_cast<FunctionDecl>(DeclCtx);
-  if (FuncDecl == nullptr)
+  if (Stmt *S = Target.dyn_cast<Stmt *>()) {
+    initFieldsGlobalsAndFuncs(S);
+    ResultObjectMap =
+        std::make_shared<PrValueToResultObject>(buildResultObjectMap(
+            DACtx, S, getThisPointeeStorageLocation(), LocForRecordReturnVal));
     return;
+  }
 
+  const auto *FuncDecl = Target.get<const FunctionDecl *>();
   assert(FuncDecl->doesThisDeclarationHaveABody());
 
   initFieldsGlobalsAndFuncs(FuncDecl);
@@ -546,7 +546,7 @@ void Environment::initialize() {
     LocForRecordReturnVal = &cast<RecordStorageLocation>(
         createStorageLocation(FuncDecl->getReturnType()));
 
-  if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(DeclCtx)) {
+  if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(FuncDecl)) {
     auto *Parent = MethodDecl->getParent();
     assert(Parent != nullptr);
 
@@ -558,7 +558,7 @@ void Environment::initialize() {
           setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr));
         } else if (Capture.capturesThis()) {
           const auto *SurroundingMethodDecl =
-              cast<CXXMethodDecl>(DeclCtx->getNonClosureAncestor());
+              cast<CXXMethodDecl>(FuncDecl->getNonClosureAncestor());
           QualType ThisPointeeType =
               SurroundingMethodDecl->getFunctionObjectParameterType();
           setThisPointeeStorageLocation(
@@ -585,37 +585,6 @@ void Environment::initialize() {
                            LocForRecordReturnVal));
 }
 
-// FIXME: Add support for resetting globals after function calls to enable
-// the implementation of sound analyses.
-void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
-  assert(FuncDecl->doesThisDeclarationHaveABody());
-
-  ReferencedDecls Referenced = getReferencedDecls(*FuncDecl);
-
-  // These have to be added before the lines that follow to ensure that
-  // `create*` work correctly for structs.
-  DACtx->addModeledFields(Referenced.Fields);
-
-  for (const VarDecl *D : Referenced.Globals) {
-    if (getStorageLocation(*D) != nullptr)
-      continue;
-
-    // We don't run transfer functions on the initializers of global variables,
-    // so they won't be associated with a value or storage location. We
-    // therefore intentionally don't pass an initializer to `createObject()`;
-    // in particular, this ensures that `createObject()` will initialize the
-    // fields of record-type variables with values.
-    setStorageLocation(*D, createObject(*D, nullptr));
-  }
-
-  for (const FunctionDecl *FD : Referenced.Functions) {
-    if (getStorageLocation(*FD) != nullptr)
-      continue;
-    auto &Loc = createStorageLocation(*FD);
-    setStorageLocation(*FD, Loc);
-  }
-}
-
 Environment Environment::fork() const {
   Environment Copy(*this);
   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
@@ -623,8 +592,14 @@ Environment Environment::fork() const {
 }
 
 bool Environment::canDescend(unsigned MaxDepth,
-                             const DeclContext *Callee) const {
-  return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
+                             const FunctionDecl *Callee) const {
+  return TargetStack.size() <= MaxDepth &&
+         !std::any_of(
+             TargetStack.begin(), TargetStack.end(),
+             [Callee](const llvm::PointerUnion<const FunctionDecl *, Stmt *>
+                          &Target) {
+               return Callee == Target.dyn_cast<const FunctionDecl *>();
+             });
 }
 
 Environment Environment::pushCall(const CallExpr *Call) const {
@@ -669,7 +644,7 @@ void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
   assert(FuncDecl->getDefinition() != nullptr);
   FuncDecl = FuncDecl->getDefinition();
 
-  CallStack.push_back(FuncDecl);
+  TargetStack.push_back(FuncDecl);
 
   initFieldsGlobalsAndFuncs(FuncDecl);
 
@@ -753,7 +728,7 @@ LatticeEffect Environment::widen(const Environment &PrevEnv,
   assert(ReturnLoc == PrevEnv.ReturnLoc);
   assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal);
   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
-  assert(CallStack == PrevEnv.CallStack);
+  assert(TargetStack == PrevEnv.TargetStack);
   assert(ResultObjectMap == PrevEnv.ResultObjectMap);
 
   auto Effect = LatticeEffect::Unchanged;
@@ -788,22 +763,20 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
   assert(EnvA.DACtx == EnvB.DACtx);
   assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal);
   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
-  assert(EnvA.CallStack == EnvB.CallStack);
+  assert(EnvA.TargetStack == EnvB.TargetStack);
   assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap);
 
   Environment JoinedEnv(*EnvA.DACtx);
 
-  JoinedEnv.CallStack = EnvA.CallStack;
+  JoinedEnv.TargetStack = EnvA.TargetStack;
   JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap;
   JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal;
   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
 
-  if (EnvA.CallStack.empty()) {
+  if (EnvA.TargetStack.empty()) {
     JoinedEnv.ReturnVal = nullptr;
   } else {
-    // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
-    // cast.
-    auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
+    auto *Func = EnvA.getCurrentFunc();
     assert(Func != nullptr);
     JoinedEnv.ReturnVal =
         joinValues(Func->getReturnType(), EnvA.ReturnVal, EnvA, EnvB.ReturnVal,
@@ -1229,16 +1202,26 @@ Environment::PrValueToResultObject Environment::buildResultObjectMap(
     RecordStorageLocation *LocForRecordReturnVal) {
   assert(FuncDecl->doesThisDeclarationHaveABody());
 
-  PrValueToResultObject Map;
+  ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/91616


More information about the cfe-commits mailing list