[clang] [clang][dataflow] Rewrite `getReferencedDecls()` with a `RecursiveASTVisitor`. (PR #93461)

via cfe-commits cfe-commits at lists.llvm.org
Mon May 27 05:06:49 PDT 2024


https://github.com/martinboehme created https://github.com/llvm/llvm-project/pull/93461

We previously had a hand-rolled recursive traversal here that was exactly what
`RecursiveASTVistor` does anyway. Using the visitor not only eliminates the
explicit traversal logic but also allows us to introduce a common visitor base
class for `getReferencedDecls()` and `ResultObjectVisitor`, ensuring that the
two are consistent in terms of the nodes they visit. Inconsistency between these
two has caused crashes in the past when `ResultObjectVisitor` tried to propagate
result object locations to entities that weren't modeled becasue
`getReferencedDecls()` didn't visit them.


>From 463f458334e2941c46ae18dbcdf075218955cd49 Mon Sep 17 00:00:00 2001
From: Martin Braenne <mboehme at google.com>
Date: Mon, 27 May 2024 12:05:40 +0000
Subject: [PATCH] [clang][dataflow] Rewrite `getReferencedDecls()` with a
 `RecursiveASTVisitor`.

We previously had a hand-rolled recursive traversal here that was exactly what
`RecursiveASTVistor` does anyway. Using the visitor not only eliminates the
explicit traversal logic but also allows us to introduce a common visitor base
class for `getReferencedDecls()` and `ResultObjectVisitor`, ensuring that the
two are consistent in terms of the nodes they visit. Inconsistency between these
two has caused crashes in the past when `ResultObjectVisitor` tried to propagate
result object locations to entities that weren't modeled becasue
`getReferencedDecls()` didn't visit them.
---
 .../clang/Analysis/FlowSensitive/ASTOps.h     |  47 +++++++
 clang/lib/Analysis/FlowSensitive/ASTOps.cpp   | 115 ++++++++++--------
 .../FlowSensitive/DataflowEnvironment.cpp     |  37 +-----
 3 files changed, 109 insertions(+), 90 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
index 05748f300a932..925b99af9141a 100644
--- a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
+++ b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
@@ -15,6 +15,7 @@
 
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Type.h"
 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
 #include "llvm/ADT/DenseSet.h"
@@ -80,6 +81,52 @@ class RecordInitListHelper {
   std::optional<ImplicitValueInitExpr> ImplicitValueInitForUnion;
 };
 
+/// Specialization of `RecursiveASTVisitor` that visits those nodes that are
+/// relevant to the dataflow analysis; generally, these are the ones that also
+/// appear in the CFG.
+/// To start the traversal, call `TraverseStmt()` on the statement or body of
+/// the function to analyze. Don't call `TraverseDecl()` on the function itself;
+/// this won't work as `TraverseDecl()` contains code to avoid traversing nested
+/// functions.
+template <class Derived>
+class AnalysisASTVisitor : public RecursiveASTVisitor<Derived> {
+public:
+  bool shouldVisitImplicitCode() { return true; }
+
+  bool shouldVisitLambdaBody() const { return false; }
+
+  bool TraverseDecl(Decl *D) {
+    // Don't traverse nested record or function declarations.
+    // - We won't be analyzing code contained in these anyway
+    // - We don't model fields that are used only in these nested declaration,
+    //   so trying to propagate a result object to initializers of such fields
+    //   would cause an error.
+    if (isa_and_nonnull<RecordDecl>(D) || isa_and_nonnull<FunctionDecl>(D))
+      return true;
+
+    return RecursiveASTVisitor<Derived>::TraverseDecl(D);
+  }
+
+  // Don't traverse expressions in unevaluated contexts, as we don't model
+  // fields that are only used in these.
+  // Note: The operand of the `noexcept` operator is an unevaluated operand, but
+  // nevertheless it appears in the Clang CFG, so we don't exclude it here.
+  bool TraverseDecltypeTypeLoc(DecltypeTypeLoc) { return true; }
+  bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc) { return true; }
+  bool TraverseCXXTypeidExpr(CXXTypeidExpr *) { return true; }
+  bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *) {
+    return true;
+  }
+
+  bool TraverseBindingDecl(BindingDecl *BD) {
+    // `RecursiveASTVisitor` doesn't traverse holding variables for
+    // `BindingDecl`s by itself, so we need to tell it to.
+    if (VarDecl *HoldingVar = BD->getHoldingVar())
+      TraverseDecl(HoldingVar);
+    return RecursiveASTVisitor<Derived>::TraverseBindingDecl(BD);
+  }
+};
+
 /// A collection of several types of declarations, all referenced from the same
 /// function.
 struct ReferencedDecls {
diff --git a/clang/lib/Analysis/FlowSensitive/ASTOps.cpp b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp
index bd1676583eccc..5a7a1077931da 100644
--- a/clang/lib/Analysis/FlowSensitive/ASTOps.cpp
+++ b/clang/lib/Analysis/FlowSensitive/ASTOps.cpp
@@ -188,90 +188,97 @@ static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) {
   return nullptr;
 }
 
-static void getReferencedDecls(const Decl &D, ReferencedDecls &Referenced) {
-  insertIfGlobal(D, Referenced.Globals);
-  insertIfFunction(D, Referenced.Functions);
-  if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
-    for (const auto *B : Decomp->bindings())
-      if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
-        // FIXME: should we be using `E->getFoundDecl()`?
-        if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
-          Referenced.Fields.insert(FD);
-}
+class ReferencedDeclsVisitor
+    : public AnalysisASTVisitor<ReferencedDeclsVisitor> {
+public:
+  ReferencedDeclsVisitor(ReferencedDecls &Referenced)
+      : Referenced(Referenced) {}
+
+  void TraverseConstructorInits(const CXXConstructorDecl *Ctor) {
+    for (const CXXCtorInitializer *Init : Ctor->inits()) {
+      if (Init->isMemberInitializer()) {
+        Referenced.Fields.insert(Init->getMember());
+      } else if (Init->isIndirectMemberInitializer()) {
+        for (const auto *I : Init->getIndirectMember()->chain())
+          Referenced.Fields.insert(cast<FieldDecl>(I));
+      }
+
+      Expr *InitExpr = Init->getInit();
+
+      // Ensure that any result objects within `InitExpr` (e.g. temporaries)
+      // are also propagated to the prvalues that initialize them.
+      TraverseStmt(InitExpr);
 
-/// Traverses `S` and inserts into `Referenced` any declarations that are
-/// declared in or referenced from sub-statements.
-static void getReferencedDecls(const Stmt &S, ReferencedDecls &Referenced) {
-  for (auto *Child : S.children())
-    if (Child != nullptr)
-      getReferencedDecls(*Child, Referenced);
-  if (const auto *DefaultArg = dyn_cast<CXXDefaultArgExpr>(&S))
-    getReferencedDecls(*DefaultArg->getExpr(), Referenced);
-  if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
-    getReferencedDecls(*DefaultInit->getExpr(), Referenced);
-
-  if (auto *DS = dyn_cast<DeclStmt>(&S)) {
-    if (DS->isSingleDecl())
-      getReferencedDecls(*DS->getSingleDecl(), Referenced);
-    else
-      for (auto *D : DS->getDeclGroup())
-        getReferencedDecls(*D, Referenced);
-  } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
+      // If this is a `CXXDefaultInitExpr`, also propagate any result objects
+      // within the default expression.
+      if (auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(InitExpr))
+        TraverseStmt(DefaultInit->getExpr());
+    }
+  }
+
+  bool VisitDecl(Decl *D) {
+    insertIfGlobal(*D, Referenced.Globals);
+    insertIfFunction(*D, Referenced.Functions);
+    return true;
+  }
+
+  bool VisitDeclRefExpr(DeclRefExpr *E) {
     insertIfGlobal(*E->getDecl(), Referenced.Globals);
     insertIfFunction(*E->getDecl(), Referenced.Functions);
-  } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) {
+    return true;
+  }
+
+  bool VisitCXXMemberCallExpr(CXXMemberCallExpr *C) {
     // If this is a method that returns a member variable but does nothing else,
     // model the field of the return value.
     if (MemberExpr *E = getMemberForAccessor(*C))
       if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl()))
         Referenced.Fields.insert(FD);
-  } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
+    return true;
+  }
+
+  bool VisitMemberExpr(MemberExpr *E) {
     // FIXME: should we be using `E->getFoundDecl()`?
     const ValueDecl *VD = E->getMemberDecl();
     insertIfGlobal(*VD, Referenced.Globals);
     insertIfFunction(*VD, Referenced.Functions);
     if (const auto *FD = dyn_cast<FieldDecl>(VD))
       Referenced.Fields.insert(FD);
-  } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
+    return true;
+  }
+
+  bool VisitInitListExpr(InitListExpr *InitList) {
     if (InitList->getType()->isRecordType())
       for (const auto *FD : getFieldsForInitListExpr(InitList))
         Referenced.Fields.insert(FD);
-  } else if (auto *ParenInitList = dyn_cast<CXXParenListInitExpr>(&S)) {
+    return true;
+  }
+
+  bool VisitCXXParenListInitExpr(CXXParenListInitExpr *ParenInitList) {
     if (ParenInitList->getType()->isRecordType())
       for (const auto *FD : getFieldsForInitListExpr(ParenInitList))
         Referenced.Fields.insert(FD);
+    return true;
   }
-}
+
+private:
+  ReferencedDecls &Referenced;
+};
 
 ReferencedDecls getReferencedDecls(const FunctionDecl &FD) {
   ReferencedDecls Result;
-  // Look for global variable and field references in the
-  // constructor-initializers.
-  if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(&FD)) {
-    for (const auto *Init : CtorDecl->inits()) {
-      if (Init->isMemberInitializer()) {
-        Result.Fields.insert(Init->getMember());
-      } else if (Init->isIndirectMemberInitializer()) {
-        for (const auto *I : Init->getIndirectMember()->chain())
-          Result.Fields.insert(cast<FieldDecl>(I));
-      }
-      const Expr *E = Init->getInit();
-      assert(E != nullptr);
-      getReferencedDecls(*E, Result);
-    }
-    // Add all fields mentioned in default member initializers.
-    for (const FieldDecl *F : CtorDecl->getParent()->fields())
-      if (const auto *I = F->getInClassInitializer())
-        getReferencedDecls(*I, Result);
-  }
-  getReferencedDecls(*FD.getBody(), Result);
+  ReferencedDeclsVisitor Visitor(Result);
+  Visitor.TraverseStmt(FD.getBody());
+  if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(&FD))
+    Visitor.TraverseConstructorInits(CtorDecl);
 
   return Result;
 }
 
 ReferencedDecls getReferencedDecls(const Stmt &S) {
   ReferencedDecls Result;
-  getReferencedDecls(S, Result);
+  ReferencedDeclsVisitor Visitor(Result);
+  Visitor.TraverseStmt(const_cast<Stmt *>(&S));
   return Result;
 }
 
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 338a85525b384..0d7967c8b9344 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -297,7 +297,7 @@ namespace {
 // Visitor that builds a map from record prvalues to result objects.
 // For each result object that it encounters, it propagates the storage location
 // of the result object to all record prvalues that can initialize it.
-class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> {
+class ResultObjectVisitor : public AnalysisASTVisitor<ResultObjectVisitor> {
 public:
   // `ResultObjectMap` will be filled with a map from record prvalues to result
   // object. If this visitor will traverse a function that returns a record by
@@ -310,10 +310,6 @@ class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> {
       : ResultObjectMap(ResultObjectMap),
         LocForRecordReturnVal(LocForRecordReturnVal), DACtx(DACtx) {}
 
-  bool shouldVisitImplicitCode() { return true; }
-
-  bool shouldVisitLambdaBody() const { return false; }
-
   // Traverse all member and base initializers of `Ctor`. This function is not
   // called by `RecursiveASTVisitor`; it should be called manually if we are
   // analyzing a constructor. `ThisPointeeLoc` is the storage location that
@@ -342,37 +338,6 @@ class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> {
     }
   }
 
-  bool TraverseDecl(Decl *D) {
-    // Don't traverse nested record or function declarations.
-    // - We won't be analyzing code contained in these anyway
-    // - We don't model fields that are used only in these nested declaration,
-    //   so trying to propagate a result object to initializers of such fields
-    //   would cause an error.
-    if (isa_and_nonnull<RecordDecl>(D) || isa_and_nonnull<FunctionDecl>(D))
-      return true;
-
-    return RecursiveASTVisitor<ResultObjectVisitor>::TraverseDecl(D);
-  }
-
-  // Don't traverse expressions in unevaluated contexts, as we don't model
-  // fields that are only used in these.
-  // Note: The operand of the `noexcept` operator is an unevaluated operand, but
-  // nevertheless it appears in the Clang CFG, so we don't exclude it here.
-  bool TraverseDecltypeTypeLoc(DecltypeTypeLoc) { return true; }
-  bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc) { return true; }
-  bool TraverseCXXTypeidExpr(CXXTypeidExpr *) { return true; }
-  bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *) {
-    return true;
-  }
-
-  bool TraverseBindingDecl(BindingDecl *BD) {
-    // `RecursiveASTVisitor` doesn't traverse holding variables for
-    // `BindingDecl`s by itself, so we need to tell it to.
-    if (VarDecl *HoldingVar = BD->getHoldingVar())
-      TraverseDecl(HoldingVar);
-    return RecursiveASTVisitor<ResultObjectVisitor>::TraverseBindingDecl(BD);
-  }
-
   bool VisitVarDecl(VarDecl *VD) {
     if (VD->getType()->isRecordType() && VD->hasInit())
       PropagateResultObject(



More information about the cfe-commits mailing list