[clang] [C++20] [Modules] Introduce a tool 'clang-named-modules-querier' and two plugins 'ClangGetUsedFilesFromModulesPlugin' and 'ClangGetDeclsInModulesPlugin' (PR #72956)

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 21 00:15:37 PST 2023


https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/72956

>From 05453bc0da214ad69ab94d901c997c61fae86ab6 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Fri, 17 Nov 2023 14:57:02 +0800
Subject: [PATCH 1/2] [C++20] [Modules] Introduce a tool
 'clang-named-modules-querier' and two plugins
 'ClangGetUsedFilesFromModulesPlugin' and 'ClangGetDeclsInModulesPlugin'

This patch introduces a tool 'clang-named-modules-querier' and two
plugins 'ClangGetUsedFilesFromModulesPlugin' and
'ClangGetDeclsInModulesPlugin' to help the build systems to avoid
compilations in modules.

After building the clang, we should be able to see
`clang-named-modules-querier` in `bin` directory and
`ClangGetUsedFilesFromModulesPlugin.so` and
`ClangGetDeclsInModulesPlugin.so` in the `lib` directory.
---
 clang/include/clang/Serialization/ASTReader.h |   4 +
 clang/lib/Serialization/ASTReader.cpp         |  25 +-
 clang/tools/CMakeLists.txt                    |   2 +-
 .../CMakeLists.txt                            |  27 +++
 .../ClangNamedModulesQuerier.cpp              | 214 ++++++++++++++++++
 .../GetDeclsInfoToJson.h                      |  48 ++++
 .../GetUsedDeclActionPlugin.cpp               | 169 ++++++++++++++
 .../GetUsedFilesFromModulesPlugin.cpp         | 131 +++++++++++
 8 files changed, 613 insertions(+), 7 deletions(-)
 create mode 100644 clang/tools/clang-named-modules-querier/CMakeLists.txt
 create mode 100644 clang/tools/clang-named-modules-querier/ClangNamedModulesQuerier.cpp
 create mode 100644 clang/tools/clang-named-modules-querier/GetDeclsInfoToJson.h
 create mode 100644 clang/tools/clang-named-modules-querier/GetUsedDeclActionPlugin.cpp
 create mode 100644 clang/tools/clang-named-modules-querier/GetUsedFilesFromModulesPlugin.cpp

diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 7eefdca6815cdad..9f028e59b9445d4 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1981,6 +1981,10 @@ class ASTReader
   /// lookup table as unmaterialized references.
   bool FindExternalVisibleDeclsByName(const DeclContext *DC,
                                       DeclarationName Name) override;
+  /// Return false if Name is none and Decl Context doesn't come from the reader.
+  bool FindVisibleDeclsByName(const DeclContext *DC, DeclarationName Name,
+                              SmallVectorImpl<NamedDecl*> &Decls);
+
 
   /// Read all of the declarations lexically stored in a
   /// declaration context.
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 42b48d230af7a97..141df3beffa0ce8 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7928,11 +7928,9 @@ void ASTReader::FindFileRegionDecls(FileID File,
     Decls.push_back(GetDecl(getGlobalDeclID(*DInfo.Mod, *DIt)));
 }
 
-bool
-ASTReader::FindExternalVisibleDeclsByName(const DeclContext *DC,
-                                          DeclarationName Name) {
-  assert(DC->hasExternalVisibleStorage() && DC == DC->getPrimaryContext() &&
-         "DeclContext has no visible decls in storage");
+bool ASTReader::FindVisibleDeclsByName(const DeclContext *DC,
+                                       DeclarationName Name,
+                                       SmallVectorImpl<NamedDecl*> &Decls) {
   if (!Name)
     return false;
 
@@ -7943,7 +7941,6 @@ ASTReader::FindExternalVisibleDeclsByName(const DeclContext *DC,
   Deserializing LookupResults(this);
 
   // Load the list of declarations.
-  SmallVector<NamedDecl *, 64> Decls;
   llvm::SmallPtrSet<NamedDecl *, 8> Found;
   for (DeclID ID : It->second.Table.find(Name)) {
     NamedDecl *ND = cast<NamedDecl>(GetDecl(ID));
@@ -7951,6 +7948,22 @@ ASTReader::FindExternalVisibleDeclsByName(const DeclContext *DC,
       Decls.push_back(ND);
   }
 
+  return true;
+}
+
+bool
+ASTReader::FindExternalVisibleDeclsByName(const DeclContext *DC,
+                                          DeclarationName Name) {
+  assert(DC->hasExternalVisibleStorage() && DC == DC->getPrimaryContext() &&
+         "DeclContext has no visible decls in storage");
+
+  Deserializing LookupResults(this);
+
+  // Load the list of declarations.
+  SmallVector<NamedDecl *, 64> Decls;
+  if (!FindVisibleDeclsByName(DC, Name, Decls))
+    return false;
+
   ++NumVisibleDeclContextsRead;
   SetExternalVisibleDeclsForName(DC, Name, Decls);
   return !Decls.empty();
diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt
index f60db6ef0ba3454..d55b79e51dfa0c5 100644
--- a/clang/tools/CMakeLists.txt
+++ b/clang/tools/CMakeLists.txt
@@ -17,7 +17,7 @@ if(HAVE_CLANG_REPL_SUPPORT)
 endif()
 
 add_clang_subdirectory(c-index-test)
-
+add_clang_subdirectory(clang-named-modules-querier)
 add_clang_subdirectory(clang-rename)
 add_clang_subdirectory(clang-refactor)
 # For MinGW we only enable shared library if LLVM_LINK_LLVM_DYLIB=ON.
diff --git a/clang/tools/clang-named-modules-querier/CMakeLists.txt b/clang/tools/clang-named-modules-querier/CMakeLists.txt
new file mode 100644
index 000000000000000..84e57b904129c78
--- /dev/null
+++ b/clang/tools/clang-named-modules-querier/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_clang_tool(clang-named-modules-querier
+  ClangNamedModulesQuerier.cpp
+  PARTIAL_SOURCES_INTENDED
+  DEPENDS
+  GENERATE_DRIVER
+  )
+
+set(CLANG_NAMED_MODULES_QUERIER
+  clangAST
+  clangBasic
+  clangFrontend
+  clangSerialization
+  clangTooling
+  )
+
+clang_target_link_libraries(clang-named-modules-querier
+  PRIVATE
+  ${CLANG_NAMED_MODULES_QUERIER}
+  )
+
+add_llvm_library(ClangGetDeclsInModulesPlugin PARTIAL_SOURCES_INTENDED MODULE GetUsedDeclActionPlugin.cpp PLUGIN_TOOL clang)
+
+add_llvm_library(ClangGetUsedFilesFromModulesPlugin PARTIAL_SOURCES_INTENDED MODULE GetUsedFilesFromModulesPlugin.cpp PLUGIN_TOOL clang)
diff --git a/clang/tools/clang-named-modules-querier/ClangNamedModulesQuerier.cpp b/clang/tools/clang-named-modules-querier/ClangNamedModulesQuerier.cpp
new file mode 100644
index 000000000000000..13503578c2307b3
--- /dev/null
+++ b/clang/tools/clang-named-modules-querier/ClangNamedModulesQuerier.cpp
@@ -0,0 +1,214 @@
+//===- ClangNamedModulesQuerier.cppm --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GetDeclsInfoToJson.h"
+
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Serialization/ASTDeserializationListener.h"
+#include "clang/Serialization/ASTReader.h"
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/LLVMDriver.h"
+#include "llvm/Support/JSON.h"
+
+using namespace clang;
+
+class DeclsQueryAction : public ASTFrontendAction {
+  std::vector<std::string> QueryingDeclNames;
+  llvm::json::Array JsonOutput;
+
+public:
+  DeclsQueryAction(std::vector<std::string> &&QueryingDeclNames) :
+    QueryingDeclNames(QueryingDeclNames) {} 
+
+  bool BeginInvocation(CompilerInstance &CI) override {
+    CI.getHeaderSearchOpts().ModuleFormat = "raw";
+    return true;
+  }
+
+  DeclContext *getDeclContextByName(ASTReader *Reader, StringRef Name);
+  std::optional<SmallVector<NamedDecl *>> getDeclsByName(ASTReader *Reader, StringRef Name);
+
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override {
+    return std::make_unique<ASTConsumer>();
+  }
+
+  void QueryDecls(ASTReader *Reader, StringRef Name);
+
+  void ExecuteAction() override {
+    assert(isCurrentFileAST() && "dumping non-AST?");
+
+    ASTReader *Reader = getCurrentASTUnit().getASTReader().get();
+    serialization::ModuleFile &MF = Reader->getModuleManager().getPrimaryModule();
+    if (!MF.StandardCXXModule) {
+      llvm::errs() << "We should only consider standard C++20 Modules.\n";
+      return;
+    }
+
+    for (auto &Name : QueryingDeclNames)
+      QueryDecls(Reader, Name);
+
+    CompilerInstance &CI = getCompilerInstance();
+    std::unique_ptr<raw_pwrite_stream> OS = CI.createDefaultOutputFile(/*Binary=*/false);
+    if (!OS) {
+      llvm::errs() << "Failed to create output file\n";
+      return;
+    }
+
+    using namespace llvm::json;
+    *OS << llvm::formatv("{0:2}\n", Value(std::move(JsonOutput)));
+  }
+};
+
+static DeclContext *getDeclContext(NamedDecl *ND) {
+  if (auto *CTD = dyn_cast<ClassTemplateDecl>(ND))
+    return CTD->getTemplatedDecl();
+
+  return dyn_cast<DeclContext>(ND);
+}
+
+static DeclContext *getDeclContextFor(const SmallVector<NamedDecl *> &DCCandidates) {
+  DeclContext *Result = nullptr;
+
+  for (auto *ND : DCCandidates) {
+    auto *DC = getDeclContext(ND);
+    if (!DC)
+      continue;
+
+    if (!Result)
+      Result = DC->getPrimaryContext();
+    else if (Result == DC->getPrimaryContext())
+      continue;
+    else {
+      llvm::errs() << "Found multiple decl context: \n";
+      cast<Decl>(Result)->dump();
+      cast<Decl>(DC)->dump();
+    }
+  }
+
+  return Result;
+}
+
+DeclContext *DeclsQueryAction::getDeclContextByName(ASTReader *Reader, StringRef Name) {
+  if (Name.empty())
+    return Reader->getContext().getTranslationUnitDecl();
+
+  std::optional<SmallVector<NamedDecl *>> DCCandidates = getDeclsByName(Reader, Name);
+  if (!DCCandidates || DCCandidates->empty())
+    return nullptr;
+
+  return getDeclContextFor(*DCCandidates);
+}
+
+std::optional<SmallVector<NamedDecl *>> DeclsQueryAction::getDeclsByName(ASTReader *Reader, StringRef Name) {
+  if (Name.endswith("::"))
+    return std::nullopt;
+
+  auto [ParentName, UnqualifiedName] = Name.rsplit("::");
+
+  // This implies that "::" is not in the Name.
+  if (ParentName == Name) {
+    UnqualifiedName = Name;
+    ParentName = StringRef();
+  }
+
+  DeclContext *DC = getDeclContextByName(Reader, ParentName);
+  if (!DC)
+    return std::nullopt;
+
+  IdentifierInfo *II = Reader->get(UnqualifiedName);
+
+  if (!II)
+    return std::nullopt;
+
+  llvm::SmallVector<NamedDecl *> Decls;
+  Reader->FindVisibleDeclsByName(DC, DeclarationName(II), Decls);
+
+  // TODO: Should we filter here?
+  return Decls;
+}
+
+void DeclsQueryAction::QueryDecls(ASTReader *Reader, StringRef Name) {
+  using namespace llvm::json;
+
+  std::optional<SmallVector<NamedDecl *>> Decls = getDeclsByName(Reader, Name);
+  if (!Decls) {
+    JsonOutput.push_back(Object{{Name, "invalid name"}});
+    return;
+  }
+
+  SourceManager &SMgr = Reader->getSourceManager();
+
+  // TODO: Handle overloads here.
+  for (NamedDecl *ND : *Decls)
+    JsonOutput.push_back(getDeclInJson(ND, SMgr));
+}
+
+// TODO: Print --help information
+// TODO: Add -resource-dir automatically
+int clang_named_modules_querier_main(int argc, char **argv, const llvm::ToolContext &) {
+  IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
+    CompilerInstance::createDiagnostics(new DiagnosticOptions());
+  CreateInvocationOptions CIOpts;
+  CIOpts.Diags = Diags;
+  CIOpts.VFS = llvm::vfs::createPhysicalFileSystem();
+
+  llvm::ArrayRef<const char *> Args(argv, argv + argc);
+  if (llvm::find_if(Args, [](auto &&Arg) {
+    return std::strcmp(Arg, "--help") == 0;
+  }) != Args.end()) {
+    llvm::outs() << R"cpp(
+To query the decls from module files.
+
+Syntax:
+
+  clang-named-modules-querier module-file - <decl-names-to-be-queried>...
+
+For example:
+
+  clang-named-modules-querier a.pcm -- a nn::a Templ::get
+  
+The unqualified name are treated as if it is under the global namespace.
+
+The output information contains kind of the declaration, source file name,
+line and col number and the hash value of declaration.
+    )cpp";
+    return 0;
+  }
+
+  auto DashDashIter = llvm::find_if(Args, [](auto &&V){
+    return std::strcmp(V, "--") == 0;
+  });
+  
+  std::vector<std::string> QueryingDeclNames;
+  auto Iter = DashDashIter;
+  // Don't record "--".
+  if (Iter != Args.end())
+    Iter++;
+  while (Iter != Args.end())
+    QueryingDeclNames.push_back(std::string(*Iter++));
+
+  if (QueryingDeclNames.empty()) {
+    llvm::errs() << "We need pass the names that need to be queried after '--'";
+    return 0;
+  }
+
+  std::shared_ptr<CompilerInvocation> Invocation =
+    createInvocation(llvm::ArrayRef<const char *>(argv, DashDashIter), CIOpts);
+
+  CompilerInstance Instance;
+  Instance.setDiagnostics(Diags.get());
+  Instance.setInvocation(Invocation);
+  DeclsQueryAction Action(std::move(QueryingDeclNames));
+  Instance.ExecuteAction(Action);
+
+  return 0;
+}
diff --git a/clang/tools/clang-named-modules-querier/GetDeclsInfoToJson.h b/clang/tools/clang-named-modules-querier/GetDeclsInfoToJson.h
new file mode 100644
index 000000000000000..ff250a87c0e1592
--- /dev/null
+++ b/clang/tools/clang-named-modules-querier/GetDeclsInfoToJson.h
@@ -0,0 +1,48 @@
+//===- GetDeclsInfoToJson.h -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_TOOLS_CLANG_NAMED_MODULES_QUERIER_GET_DECLS_INFO_TO_JSON_H
+#define CLANG_TOOLS_CLANG_NAMED_MODULES_QUERIER_GET_DECLS_INFO_TO_JSON_H
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/ODRHash.h"
+#include "clang/Basic/SourceManager.h"
+#include "llvm/Support/JSON.h"
+
+namespace clang {
+inline unsigned getHashValue(const NamedDecl *ND) {
+  ODRHash Hasher;
+
+  if (auto *FD = dyn_cast<FunctionDecl>(ND))
+    Hasher.AddFunctionDecl(FD);
+  else if (auto *ED = dyn_cast<EnumDecl>(ND))
+    Hasher.AddEnumDecl(ED);
+  else if (auto *CRD = dyn_cast<CXXRecordDecl>(ND))
+    Hasher.AddCXXRecordDecl(CRD);
+  else {
+    Hasher.AddDecl(ND);
+    Hasher.AddSubDecl(ND);
+  }
+
+  return Hasher.CalculateHash();
+}
+
+inline llvm::json::Object getDeclInJson(const NamedDecl *ND, SourceManager &SMgr) {
+  llvm::json::Object DeclObject;
+  DeclObject.try_emplace("kind", ND->getDeclKindName());
+  FullSourceLoc FSL(ND->getLocation(), SMgr);
+  const FileEntry *FE = SMgr.getFileEntryForID(FSL.getFileID());
+  DeclObject.try_emplace("source File Name", FE ? FE->getName() : "Unknown Source File");
+  DeclObject.try_emplace("line", FSL.getSpellingLineNumber());
+  DeclObject.try_emplace("col", FSL.getSpellingColumnNumber());
+  DeclObject.try_emplace("Hash", getHashValue(ND));
+  return llvm::json::Object({{ND->getQualifiedNameAsString(), std::move(DeclObject)}});
+}
+}
+
+#endif
diff --git a/clang/tools/clang-named-modules-querier/GetUsedDeclActionPlugin.cpp b/clang/tools/clang-named-modules-querier/GetUsedDeclActionPlugin.cpp
new file mode 100644
index 000000000000000..a66eecbb9423324
--- /dev/null
+++ b/clang/tools/clang-named-modules-querier/GetUsedDeclActionPlugin.cpp
@@ -0,0 +1,169 @@
+//===- GetUsedDeclActionPlugin.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GetDeclsInfoToJson.h"
+
+#include "clang/Frontend/FrontendPluginRegistry.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Serialization/ASTDeserializationListener.h"
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Path.h"
+
+namespace clang {
+
+class DeclsQuerier : public ASTDeserializationListener {
+public:
+  void DeclRead(serialization::DeclID ID, const Decl *D) override {
+    // We only cares about function decls, var decls, tag decls (class, struct, enum, union).
+    if (!isa<NamedDecl>(D))
+      return;
+    
+    // We only records the template declaration if the declaration is placed in templates.
+    if (auto *FD = dyn_cast<FunctionDecl>(D); FD && FD->getDescribedFunctionTemplate())
+      return;
+
+    if (auto *VD = dyn_cast<VarDecl>(D); VD && VD->getDescribedVarTemplate())
+      return;
+
+    if (auto *CRD = dyn_cast<CXXRecordDecl>(D); CRD && CRD->getDescribedClassTemplate())
+      return;
+
+    if (isa<TemplateTypeParmDecl, NonTypeTemplateParmDecl, TemplateTemplateParmDecl>(D))
+      return;
+        
+    // We don't care about declarations in function scope. 
+    if (isa<FunctionDecl>(D->getDeclContext()))
+      return;
+    
+    // Skip implicit declarations.
+    if (D->isImplicit())
+      return;
+
+    Module *M = D->getOwningModule();
+    // We only cares about C++20 Named Modules.
+    if (!M || !M->getTopLevelModule()->isNamedModule())
+      return;
+
+    StringRef ModuleName = M->Name;
+    auto Iter = Names.find(ModuleName);
+    if (Iter == Names.end())
+      Iter = Names.try_emplace(ModuleName, std::vector<const NamedDecl*>()).first;
+    
+    Iter->second.push_back(cast<NamedDecl>(D));
+  }
+
+  llvm::StringMap<std::vector<const NamedDecl *>> Names;
+};
+
+class DeclsQuerierConsumer : public ASTConsumer {
+  CompilerInstance &CI;
+  StringRef InFile;
+  std::string OutputFile;
+  DeclsQuerier Querier;
+  
+public:
+  DeclsQuerierConsumer(CompilerInstance &CI, StringRef InFile, StringRef OutputFile)
+    : CI(CI), InFile(InFile), OutputFile(OutputFile) {}
+
+  ASTDeserializationListener *GetASTDeserializationListener() override {
+    return &Querier;
+  }
+
+  std::unique_ptr<raw_pwrite_stream> getOutputFile() {
+    if (OutputFile.empty()) {
+      llvm::SmallString<256> Path(InFile);
+      llvm::sys::path::replace_extension(Path, "used_external_decls.json");
+      OutputFile = (std::string)Path;
+    }
+
+    std::error_code EC;
+    auto OS = std::make_unique<llvm::raw_fd_ostream>(OutputFile, EC);
+    if (EC)
+      return nullptr;
+    
+    return OS;
+  }
+
+  void HandleTranslationUnit(ASTContext &Ctx) override {
+    std::unique_ptr<raw_pwrite_stream> OS = getOutputFile();
+    if (!OS)
+      return;
+
+    using namespace llvm::json;
+
+    Array Modules;
+
+    for (auto &Iter : Querier.Names) {
+      Object ModulesInfo;
+
+      StringRef ModuleName = Iter.first();
+      ModulesInfo.try_emplace("module", ModuleName);
+
+      std::vector<const NamedDecl *> Decls(Iter.second);
+      Array DeclsInJson;
+      for (auto *ND : Decls)
+        DeclsInJson.push_back(getDeclInJson(ND, Ctx.getSourceManager()));
+
+      ModulesInfo.try_emplace("decls", std::move(DeclsInJson));
+      Modules.push_back(std::move(ModulesInfo));
+    }
+
+    *OS << llvm::formatv("{0:2}\n", Value(std::move(Modules)));
+  }
+};
+
+void PrintHelp();
+
+class DeclsQueryAction : public PluginASTAction {
+  std::string OutputFile;
+
+public:
+  DeclsQueryAction(StringRef OutputFile) : OutputFile(OutputFile) {}
+  DeclsQueryAction() = default;
+
+
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override {
+    return std::make_unique<DeclsQuerierConsumer>(CI, InFile, OutputFile);
+  }
+
+  ActionType getActionType() override { return AddAfterMainAction; }
+
+  bool ParseArgs(const CompilerInstance &CI,
+                 const std::vector<std::string> &Args) override {
+    for (auto &Arg : Args) {
+      if (StringRef(Arg).startswith("output=")) {
+        OutputFile = StringRef(Arg).split('=').second;
+      } else {
+        PrintHelp();
+        return false;
+      }
+    }
+
+    return true;
+  }
+};
+
+void PrintHelp() {
+  llvm::outs() << R"cpp(
+To get used decls from modules.
+
+The output is printed to the std output by default when use it as a standalone tool.
+
+If you're using plugin, use -fplugin-arg-decls_query_from_modules-output=<output-file>
+to specify the output path of used decls.
+  )cpp";
+}
+}
+
+static clang::FrontendPluginRegistry::Add<clang::DeclsQueryAction>
+X("decls_query_from_modules", "query used decls from modules");
diff --git a/clang/tools/clang-named-modules-querier/GetUsedFilesFromModulesPlugin.cpp b/clang/tools/clang-named-modules-querier/GetUsedFilesFromModulesPlugin.cpp
new file mode 100644
index 000000000000000..bf244861e2bfb83
--- /dev/null
+++ b/clang/tools/clang-named-modules-querier/GetUsedFilesFromModulesPlugin.cpp
@@ -0,0 +1,131 @@
+//===- ClangGetUsedFilesFromModules.cpp -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/FrontendPluginRegistry.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Serialization/ASTDeserializationListener.h"
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Path.h"
+
+namespace clang {
+
+class DeclsQuerier : public ASTDeserializationListener {
+public:
+  void DeclRead(serialization::DeclID ID, const Decl *D) override {
+    // Filter Decl's to avoid store too many informations.
+    if (!D->getLexicalDeclContext())
+      return;
+
+    if (!isa<FunctionDecl>(D) && 
+        !D->getLexicalDeclContext()->getRedeclContext()->isFileContext())
+      return;
+
+    ASTContext &Ctx = D->getASTContext();
+    SourceManager &SMgr = Ctx.getSourceManager();
+    FullSourceLoc FSL(D->getLocation(), SMgr);
+    if (!FSL.isValid())
+      return;
+
+    FileIDSet.insert(FSL.getFileID());
+  }
+
+  llvm::DenseSet<FileID> FileIDSet;
+};
+
+class DeclsQuerierConsumer : public ASTConsumer {
+  CompilerInstance &CI;
+  StringRef InFile;
+  std::string OutputFile;
+  DeclsQuerier Querier;
+  
+public:
+  DeclsQuerierConsumer(CompilerInstance &CI, StringRef InFile, StringRef OutputFile)
+    : CI(CI), InFile(InFile), OutputFile(OutputFile) {}
+
+  ASTDeserializationListener *GetASTDeserializationListener() override {
+    return &Querier;
+  }
+
+  std::unique_ptr<raw_pwrite_stream> getOutputFile() {
+    if (OutputFile.empty()) {
+      llvm::SmallString<256> Path(InFile);
+      llvm::sys::path::replace_extension(Path, "used_files.txt");
+      OutputFile = (std::string)Path;
+    }
+
+    std::error_code EC;
+    auto OS = std::make_unique<llvm::raw_fd_ostream>(OutputFile, EC);
+    if (EC)
+      return nullptr;
+    
+    return OS;
+  }
+
+  void HandleTranslationUnit(ASTContext &Ctx) override {
+    std::unique_ptr<raw_pwrite_stream> OS = getOutputFile();
+    if (!OS)
+      return;
+
+    auto &SMgr = Ctx.getSourceManager();
+    
+    for (const auto &FID : Querier.FileIDSet) {
+      const FileEntry *FE = SMgr.getFileEntryForID(FID);
+      if (!FE)
+        continue;
+      
+      *OS << FE->getName() << "\n";
+    }
+  }
+};
+
+void PrintHelp();
+
+class DeclsQueryAction : public PluginASTAction {
+  std::string OutputFile;
+
+public:
+  DeclsQueryAction(StringRef OutputFile) : OutputFile(OutputFile) {}
+  DeclsQueryAction() = default;
+
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override {
+    return std::make_unique<DeclsQuerierConsumer>(CI, InFile, OutputFile);
+  }
+
+  ActionType getActionType() override { return AddAfterMainAction; }
+
+  bool ParseArgs(const CompilerInstance &CI,
+                 const std::vector<std::string> &Args) override {
+    for (auto &Arg : Args) {
+      if (StringRef(Arg).startswith("output=")) {
+        OutputFile = StringRef(Arg).split('=').second.str();
+      } else {
+        PrintHelp();
+        return false;
+      }
+    }
+
+    return true;
+  }
+};
+
+void PrintHelp() {
+  llvm::outs() << R"cpp(
+To get used decls from modules.
+
+If you're using plugin, use -fplugin-arg-get_used_files_from_modules-output=<output-file>
+to specify the output path of used files.
+  )cpp";
+}
+}
+
+static clang::FrontendPluginRegistry::Add<clang::DeclsQueryAction>
+X("get_used_files_from_modules", "get used files from modules");

>From 85afc3a4aaa2d17b7cda318b75b874157f5060eb Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Tue, 21 Nov 2023 15:54:33 +0800
Subject: [PATCH 2/2] Don't add more decls when we computes the hash value

---
 .../CMakeLists.txt                            |  2 +-
 .../GetDeclsInfoToJson.h                      |  9 +++++----
 ....cpp => GetUsedDeclsFromModulesPlugin.cpp} | 19 +++++++++++++++----
 .../GetUsedFilesFromModulesPlugin.cpp         |  6 ++++--
 4 files changed, 25 insertions(+), 11 deletions(-)
 rename clang/tools/clang-named-modules-querier/{GetUsedDeclActionPlugin.cpp => GetUsedDeclsFromModulesPlugin.cpp} (89%)

diff --git a/clang/tools/clang-named-modules-querier/CMakeLists.txt b/clang/tools/clang-named-modules-querier/CMakeLists.txt
index 84e57b904129c78..a963e983a66769e 100644
--- a/clang/tools/clang-named-modules-querier/CMakeLists.txt
+++ b/clang/tools/clang-named-modules-querier/CMakeLists.txt
@@ -22,6 +22,6 @@ clang_target_link_libraries(clang-named-modules-querier
   ${CLANG_NAMED_MODULES_QUERIER}
   )
 
-add_llvm_library(ClangGetDeclsInModulesPlugin PARTIAL_SOURCES_INTENDED MODULE GetUsedDeclActionPlugin.cpp PLUGIN_TOOL clang)
+add_llvm_library(ClangGetUsedDeclsFromModulesPlugin PARTIAL_SOURCES_INTENDED MODULE GetUsedDeclsFromModulesPlugin.cpp PLUGIN_TOOL clang)
 
 add_llvm_library(ClangGetUsedFilesFromModulesPlugin PARTIAL_SOURCES_INTENDED MODULE GetUsedFilesFromModulesPlugin.cpp PLUGIN_TOOL clang)
diff --git a/clang/tools/clang-named-modules-querier/GetDeclsInfoToJson.h b/clang/tools/clang-named-modules-querier/GetDeclsInfoToJson.h
index ff250a87c0e1592..ed28ef16ccbc157 100644
--- a/clang/tools/clang-named-modules-querier/GetDeclsInfoToJson.h
+++ b/clang/tools/clang-named-modules-querier/GetDeclsInfoToJson.h
@@ -10,6 +10,7 @@
 #define CLANG_TOOLS_CLANG_NAMED_MODULES_QUERIER_GET_DECLS_INFO_TO_JSON_H
 
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/ODRHash.h"
 #include "clang/Basic/SourceManager.h"
 #include "llvm/Support/JSON.h"
@@ -19,11 +20,11 @@ inline unsigned getHashValue(const NamedDecl *ND) {
   ODRHash Hasher;
 
   if (auto *FD = dyn_cast<FunctionDecl>(ND))
-    Hasher.AddFunctionDecl(FD);
+    return FD->getODRHash();
   else if (auto *ED = dyn_cast<EnumDecl>(ND))
-    Hasher.AddEnumDecl(ED);
-  else if (auto *CRD = dyn_cast<CXXRecordDecl>(ND))
-    Hasher.AddCXXRecordDecl(CRD);
+    return const_cast<EnumDecl*>(ED)->getODRHash();
+  else if (auto *CRD = dyn_cast<CXXRecordDecl>(ND); CRD && CRD->hasDefinition())
+    return CRD->getODRHash();
   else {
     Hasher.AddDecl(ND);
     Hasher.AddSubDecl(ND);
diff --git a/clang/tools/clang-named-modules-querier/GetUsedDeclActionPlugin.cpp b/clang/tools/clang-named-modules-querier/GetUsedDeclsFromModulesPlugin.cpp
similarity index 89%
rename from clang/tools/clang-named-modules-querier/GetUsedDeclActionPlugin.cpp
rename to clang/tools/clang-named-modules-querier/GetUsedDeclsFromModulesPlugin.cpp
index a66eecbb9423324..43613ed11d9bbc7 100644
--- a/clang/tools/clang-named-modules-querier/GetUsedDeclActionPlugin.cpp
+++ b/clang/tools/clang-named-modules-querier/GetUsedDeclsFromModulesPlugin.cpp
@@ -1,4 +1,4 @@
-//===- GetUsedDeclActionPlugin.cpp -----------------------------------------===//
+//===- GetUsedDeclsFromModulesPlugin.cpp ----------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -23,10 +23,17 @@ namespace clang {
 class DeclsQuerier : public ASTDeserializationListener {
 public:
   void DeclRead(serialization::DeclID ID, const Decl *D) override {
+    if (Stopped)
+      return;
+
     // We only cares about function decls, var decls, tag decls (class, struct, enum, union).
     if (!isa<NamedDecl>(D))
       return;
     
+    // Filter Decl's to avoid store too many informations.
+    if (!D->getLexicalDeclContext())
+      return;
+
     // We only records the template declaration if the declaration is placed in templates.
     if (auto *FD = dyn_cast<FunctionDecl>(D); FD && FD->getDescribedFunctionTemplate())
       return;
@@ -62,6 +69,7 @@ class DeclsQuerier : public ASTDeserializationListener {
   }
 
   llvm::StringMap<std::vector<const NamedDecl *>> Names;
+  bool Stopped = false;
 };
 
 class DeclsQuerierConsumer : public ASTConsumer {
@@ -97,6 +105,10 @@ class DeclsQuerierConsumer : public ASTConsumer {
     std::unique_ptr<raw_pwrite_stream> OS = getOutputFile();
     if (!OS)
       return;
+    
+    /// Otherwise the process of computing ODR Hash may involve more decls
+    /// get deserialized.
+    Querier.Stopped = true;
 
     using namespace llvm::json;
 
@@ -130,7 +142,6 @@ class DeclsQueryAction : public PluginASTAction {
   DeclsQueryAction(StringRef OutputFile) : OutputFile(OutputFile) {}
   DeclsQueryAction() = default;
 
-
   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                  StringRef InFile) override {
     return std::make_unique<DeclsQuerierConsumer>(CI, InFile, OutputFile);
@@ -159,11 +170,11 @@ To get used decls from modules.
 
 The output is printed to the std output by default when use it as a standalone tool.
 
-If you're using plugin, use -fplugin-arg-decls_query_from_modules-output=<output-file>
+If you're using plugin, use -fplugin-arg-get_used_decls_from_modules-output=<output-file>
 to specify the output path of used decls.
   )cpp";
 }
 }
 
 static clang::FrontendPluginRegistry::Add<clang::DeclsQueryAction>
-X("decls_query_from_modules", "query used decls from modules");
+X("get_used_decls_from_modules", "query used decls from modules");
diff --git a/clang/tools/clang-named-modules-querier/GetUsedFilesFromModulesPlugin.cpp b/clang/tools/clang-named-modules-querier/GetUsedFilesFromModulesPlugin.cpp
index bf244861e2bfb83..d1c132f2e6a6aa0 100644
--- a/clang/tools/clang-named-modules-querier/GetUsedFilesFromModulesPlugin.cpp
+++ b/clang/tools/clang-named-modules-querier/GetUsedFilesFromModulesPlugin.cpp
@@ -20,11 +20,13 @@ namespace clang {
 class DeclsQuerier : public ASTDeserializationListener {
 public:
   void DeclRead(serialization::DeclID ID, const Decl *D) override {
-    // Filter Decl's to avoid store too many informations.
+    if (!isa<NamedDecl>(D))
+      return;
+
     if (!D->getLexicalDeclContext())
       return;
 
-    if (!isa<FunctionDecl>(D) && 
+    if (!isa<FunctionDecl>(D) &&
         !D->getLexicalDeclContext()->getRedeclContext()->isFileContext())
       return;
 



More information about the cfe-commits mailing list