[clang-tools-extra] [clang-doc] Improve performance by adding a short circuit (PR #96809)

via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 26 11:38:38 PDT 2024


https://github.com/PeterChou1 created https://github.com/llvm/llvm-project/pull/96809

This patch adds a short circuit which address performance problem referenced in 
issue: https://github.com/llvm/llvm-project/issues/96570

>From my local testing it cut the runtime of generating LLVM docs from 10 hours to 30 mins



>From 7733243a7b9660ce2bcbd43f6219314fcd1e4a73 Mon Sep 17 00:00:00 2001
From: PeterChou1 <peter.chou at mail.utoronto.ca>
Date: Wed, 26 Jun 2024 14:20:03 -0400
Subject: [PATCH 1/2] [clang-doc] add short circuit in mapper

---
 clang-tools-extra/clang-doc/Mapper.cpp        | 21 +++++++++++++++++++
 clang-tools-extra/clang-doc/Mapper.h          |  3 +++
 .../clang-doc/Representation.cpp              |  8 +++----
 clang-tools-extra/clang-doc/Representation.h  |  7 +++++--
 .../clang-doc/tool/ClangDocMain.cpp           | 13 ++++++------
 5 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/clang-tools-extra/clang-doc/Mapper.cpp b/clang-tools-extra/clang-doc/Mapper.cpp
index bb8b7952980ac..ba6311e85a2ee 100644
--- a/clang-tools-extra/clang-doc/Mapper.cpp
+++ b/clang-tools-extra/clang-doc/Mapper.cpp
@@ -17,6 +17,11 @@
 namespace clang {
 namespace doc {
 
+
+static std::unordered_set<std::string> USRVisited;
+
+static llvm::sys::Mutex USRVisitedGuard;
+
 void MapASTVisitor::HandleTranslationUnit(ASTContext &Context) {
   TraverseDecl(Context.getTranslationUnitDecl());
 }
@@ -34,6 +39,17 @@ template <typename T> bool MapASTVisitor::mapDecl(const T *D) {
   // If there is an error generating a USR for the decl, skip this decl.
   if (index::generateUSRForDecl(D, USR))
     return true;
+
+  // Prevent Visiting USR twice
+  {
+    std::lock_guard<llvm::sys::Mutex> Guard(USRVisitedGuard);
+    std::string Visited = USR.str().str();
+    if (USRVisited.count(Visited)) {
+      return true;
+    }
+    USRVisited.insert(Visited);
+  }
+
   bool IsFileInRootDir;
   llvm::SmallString<128> File =
       getFile(D, D->getASTContext(), CDCtx.SourceRoot, IsFileInRootDir);
@@ -41,6 +57,11 @@ template <typename T> bool MapASTVisitor::mapDecl(const T *D) {
                                getLine(D, D->getASTContext()), File,
                                IsFileInRootDir, CDCtx.PublicOnly);
 
+  // Bitcode
+  if (CDCtx.NoBitcode) {
+    return true;
+  }
+
   // A null in place of I indicates that the serializer is skipping this decl
   // for some reason (e.g. we're only reporting public decls).
   if (I.first)
diff --git a/clang-tools-extra/clang-doc/Mapper.h b/clang-tools-extra/clang-doc/Mapper.h
index cedde935ab743..1da7a66f1471a 100644
--- a/clang-tools-extra/clang-doc/Mapper.h
+++ b/clang-tools-extra/clang-doc/Mapper.h
@@ -20,6 +20,8 @@
 #include "Representation.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Tooling/Execution.h"
+#include "llvm/Support/Mutex.h"
+#include <unordered_set>
 
 using namespace clang::comments;
 using namespace clang::tooling;
@@ -53,6 +55,7 @@ class MapASTVisitor : public clang::RecursiveASTVisitor<MapASTVisitor>,
                                     const ASTContext &Context) const;
 
   ClangDocContext CDCtx;
+
 };
 
 } // namespace doc
diff --git a/clang-tools-extra/clang-doc/Representation.cpp b/clang-tools-extra/clang-doc/Representation.cpp
index 2afff2929cf79..1ac662b4dd3d2 100644
--- a/clang-tools-extra/clang-doc/Representation.cpp
+++ b/clang-tools-extra/clang-doc/Representation.cpp
@@ -366,13 +366,13 @@ void Index::sort() {
 
 ClangDocContext::ClangDocContext(tooling::ExecutionContext *ECtx,
                                  StringRef ProjectName, bool PublicOnly,
-                                 StringRef OutDirectory, StringRef SourceRoot,
-                                 StringRef RepositoryUrl,
+                                 bool NoBitcode, StringRef OutDirectory,
+                                 StringRef SourceRoot, StringRef RepositoryUrl,
                                  std::vector<std::string> UserStylesheets,
                                  std::vector<std::string> JsScripts)
     : ECtx(ECtx), ProjectName(ProjectName), PublicOnly(PublicOnly),
-      OutDirectory(OutDirectory), UserStylesheets(UserStylesheets),
-      JsScripts(JsScripts) {
+      NoBitcode(NoBitcode), OutDirectory(OutDirectory),
+      UserStylesheets(UserStylesheets), JsScripts(JsScripts) {
   llvm::SmallString<128> SourceRootDir(SourceRoot);
   if (SourceRoot.empty())
     // If no SourceRoot was provided the current path is used as the default
diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h
index a6b144eb7fa2a..00f4f722f8b5a 100644
--- a/clang-tools-extra/clang-doc/Representation.h
+++ b/clang-tools-extra/clang-doc/Representation.h
@@ -480,12 +480,15 @@ mergeInfos(std::vector<std::unique_ptr<Info>> &Values);
 struct ClangDocContext {
   ClangDocContext() = default;
   ClangDocContext(tooling::ExecutionContext *ECtx, StringRef ProjectName,
-                  bool PublicOnly, StringRef OutDirectory, StringRef SourceRoot,
-                  StringRef RepositoryUrl,
+                  bool NoBitcode, bool PublicOnly, StringRef OutDirectory,
+                  StringRef SourceRoot, StringRef RepositoryUrl,
                   std::vector<std::string> UserStylesheets,
                   std::vector<std::string> JsScripts);
+
+  llvm::StringMap<std::vector<std::unique_ptr<Info>>> Infos;
   tooling::ExecutionContext *ECtx;
   std::string ProjectName; // Name of project clang-doc is documenting.
+  bool NoBitcode;  // Indicate if clang-doc will serialize Decl into Bitcode
   bool PublicOnly; // Indicates if only public declarations are documented.
   std::string OutDirectory; // Directory for outputting generated files.
   std::string SourceRoot;   // Directory where processed files are stored. Links
diff --git a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
index 5517522d7967d..fc111e090d72a 100644
--- a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
+++ b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
@@ -18,12 +18,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "BitcodeReader.h"
-#include "BitcodeWriter.h"
 #include "ClangDoc.h"
 #include "Generators.h"
 #include "Representation.h"
-#include "clang/AST/AST.h"
-#include "clang/AST/Decl.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchersInternal.h"
 #include "clang/Driver/Options.h"
@@ -31,14 +28,11 @@
 #include "clang/Tooling/AllTUsExecution.h"
 #include "clang/Tooling/CommonOptionsParser.h"
 #include "clang/Tooling/Execution.h"
-#include "clang/Tooling/Tooling.h"
-#include "llvm/ADT/APFloat.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/raw_ostream.h"
@@ -67,6 +61,11 @@ static llvm::cl::opt<std::string>
                  llvm::cl::desc("Directory for outputting generated files."),
                  llvm::cl::init("docs"), llvm::cl::cat(ClangDocCategory));
 
+static llvm::cl::opt<bool> NoBitcode(
+     "nobitcode",
+     llvm::cl::desc("Do not emit bitcode for faster processing time"),
+     llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
+
 static llvm::cl::opt<bool>
     PublicOnly("public", llvm::cl::desc("Document only public declarations."),
                llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
@@ -175,6 +174,7 @@ Example usage for a project using a compile commands database:
       Executor->get()->getExecutionContext(),
       ProjectName,
       PublicOnly,
+      NoBitcode,
       OutDirectory,
       SourceRoot,
       RepositoryUrl,
@@ -234,6 +234,7 @@ Example usage for a project using a compile commands database:
 
   // First reducing phase (reduce all decls into one info per decl).
   llvm::outs() << "Reducing " << USRToBitcode.size() << " infos...\n";
+
   std::atomic<bool> Error;
   Error = false;
   llvm::sys::Mutex IndexMutex;

>From 59fa4d9d31961fd57fd5d28ef171b48fb7fb99d7 Mon Sep 17 00:00:00 2001
From: PeterChou1 <peter.chou at mail.utoronto.ca>
Date: Wed, 26 Jun 2024 14:36:09 -0400
Subject: [PATCH 2/2] [clang-doc] add short circuit to improve performance

---
 clang-tools-extra/clang-doc/Mapper.cpp            |  7 ++-----
 clang-tools-extra/clang-doc/Mapper.h              |  3 ---
 clang-tools-extra/clang-doc/Representation.cpp    |  8 ++++----
 clang-tools-extra/clang-doc/Representation.h      |  7 ++-----
 clang-tools-extra/clang-doc/tool/ClangDocMain.cpp | 13 ++++++-------
 5 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/clang-tools-extra/clang-doc/Mapper.cpp b/clang-tools-extra/clang-doc/Mapper.cpp
index ba6311e85a2ee..b9ca6edd03b8c 100644
--- a/clang-tools-extra/clang-doc/Mapper.cpp
+++ b/clang-tools-extra/clang-doc/Mapper.cpp
@@ -12,7 +12,8 @@
 #include "clang/AST/Comment.h"
 #include "clang/Index/USRGeneration.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Error.h"
+#include "llvm/Support/Mutex.h"
+#include <unordered_set>
 
 namespace clang {
 namespace doc {
@@ -57,10 +58,6 @@ template <typename T> bool MapASTVisitor::mapDecl(const T *D) {
                                getLine(D, D->getASTContext()), File,
                                IsFileInRootDir, CDCtx.PublicOnly);
 
-  // Bitcode
-  if (CDCtx.NoBitcode) {
-    return true;
-  }
 
   // A null in place of I indicates that the serializer is skipping this decl
   // for some reason (e.g. we're only reporting public decls).
diff --git a/clang-tools-extra/clang-doc/Mapper.h b/clang-tools-extra/clang-doc/Mapper.h
index 1da7a66f1471a..cedde935ab743 100644
--- a/clang-tools-extra/clang-doc/Mapper.h
+++ b/clang-tools-extra/clang-doc/Mapper.h
@@ -20,8 +20,6 @@
 #include "Representation.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Tooling/Execution.h"
-#include "llvm/Support/Mutex.h"
-#include <unordered_set>
 
 using namespace clang::comments;
 using namespace clang::tooling;
@@ -55,7 +53,6 @@ class MapASTVisitor : public clang::RecursiveASTVisitor<MapASTVisitor>,
                                     const ASTContext &Context) const;
 
   ClangDocContext CDCtx;
-
 };
 
 } // namespace doc
diff --git a/clang-tools-extra/clang-doc/Representation.cpp b/clang-tools-extra/clang-doc/Representation.cpp
index 1ac662b4dd3d2..2afff2929cf79 100644
--- a/clang-tools-extra/clang-doc/Representation.cpp
+++ b/clang-tools-extra/clang-doc/Representation.cpp
@@ -366,13 +366,13 @@ void Index::sort() {
 
 ClangDocContext::ClangDocContext(tooling::ExecutionContext *ECtx,
                                  StringRef ProjectName, bool PublicOnly,
-                                 bool NoBitcode, StringRef OutDirectory,
-                                 StringRef SourceRoot, StringRef RepositoryUrl,
+                                 StringRef OutDirectory, StringRef SourceRoot,
+                                 StringRef RepositoryUrl,
                                  std::vector<std::string> UserStylesheets,
                                  std::vector<std::string> JsScripts)
     : ECtx(ECtx), ProjectName(ProjectName), PublicOnly(PublicOnly),
-      NoBitcode(NoBitcode), OutDirectory(OutDirectory),
-      UserStylesheets(UserStylesheets), JsScripts(JsScripts) {
+      OutDirectory(OutDirectory), UserStylesheets(UserStylesheets),
+      JsScripts(JsScripts) {
   llvm::SmallString<128> SourceRootDir(SourceRoot);
   if (SourceRoot.empty())
     // If no SourceRoot was provided the current path is used as the default
diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h
index 00f4f722f8b5a..a6b144eb7fa2a 100644
--- a/clang-tools-extra/clang-doc/Representation.h
+++ b/clang-tools-extra/clang-doc/Representation.h
@@ -480,15 +480,12 @@ mergeInfos(std::vector<std::unique_ptr<Info>> &Values);
 struct ClangDocContext {
   ClangDocContext() = default;
   ClangDocContext(tooling::ExecutionContext *ECtx, StringRef ProjectName,
-                  bool NoBitcode, bool PublicOnly, StringRef OutDirectory,
-                  StringRef SourceRoot, StringRef RepositoryUrl,
+                  bool PublicOnly, StringRef OutDirectory, StringRef SourceRoot,
+                  StringRef RepositoryUrl,
                   std::vector<std::string> UserStylesheets,
                   std::vector<std::string> JsScripts);
-
-  llvm::StringMap<std::vector<std::unique_ptr<Info>>> Infos;
   tooling::ExecutionContext *ECtx;
   std::string ProjectName; // Name of project clang-doc is documenting.
-  bool NoBitcode;  // Indicate if clang-doc will serialize Decl into Bitcode
   bool PublicOnly; // Indicates if only public declarations are documented.
   std::string OutDirectory; // Directory for outputting generated files.
   std::string SourceRoot;   // Directory where processed files are stored. Links
diff --git a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
index fc111e090d72a..5517522d7967d 100644
--- a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
+++ b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
@@ -18,9 +18,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "BitcodeReader.h"
+#include "BitcodeWriter.h"
 #include "ClangDoc.h"
 #include "Generators.h"
 #include "Representation.h"
+#include "clang/AST/AST.h"
+#include "clang/AST/Decl.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchersInternal.h"
 #include "clang/Driver/Options.h"
@@ -28,11 +31,14 @@
 #include "clang/Tooling/AllTUsExecution.h"
 #include "clang/Tooling/CommonOptionsParser.h"
 #include "clang/Tooling/Execution.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/raw_ostream.h"
@@ -61,11 +67,6 @@ static llvm::cl::opt<std::string>
                  llvm::cl::desc("Directory for outputting generated files."),
                  llvm::cl::init("docs"), llvm::cl::cat(ClangDocCategory));
 
-static llvm::cl::opt<bool> NoBitcode(
-     "nobitcode",
-     llvm::cl::desc("Do not emit bitcode for faster processing time"),
-     llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
-
 static llvm::cl::opt<bool>
     PublicOnly("public", llvm::cl::desc("Document only public declarations."),
                llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
@@ -174,7 +175,6 @@ Example usage for a project using a compile commands database:
       Executor->get()->getExecutionContext(),
       ProjectName,
       PublicOnly,
-      NoBitcode,
       OutDirectory,
       SourceRoot,
       RepositoryUrl,
@@ -234,7 +234,6 @@ Example usage for a project using a compile commands database:
 
   // First reducing phase (reduce all decls into one info per decl).
   llvm::outs() << "Reducing " << USRToBitcode.size() << " infos...\n";
-
   std::atomic<bool> Error;
   Error = false;
   llvm::sys::Mutex IndexMutex;



More information about the cfe-commits mailing list