[clang-tools-extra] [clang-doc] Improve performance by adding a short circuit (PR #96809)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 26 11:38:38 PDT 2024
https://github.com/PeterChou1 created https://github.com/llvm/llvm-project/pull/96809
This patch adds a short circuit which address performance problem referenced in
issue: https://github.com/llvm/llvm-project/issues/96570
>From my local testing it cut the runtime of generating LLVM docs from 10 hours to 30 mins
>From 7733243a7b9660ce2bcbd43f6219314fcd1e4a73 Mon Sep 17 00:00:00 2001
From: PeterChou1 <peter.chou at mail.utoronto.ca>
Date: Wed, 26 Jun 2024 14:20:03 -0400
Subject: [PATCH 1/2] [clang-doc] add short circuit in mapper
---
clang-tools-extra/clang-doc/Mapper.cpp | 21 +++++++++++++++++++
clang-tools-extra/clang-doc/Mapper.h | 3 +++
.../clang-doc/Representation.cpp | 8 +++----
clang-tools-extra/clang-doc/Representation.h | 7 +++++--
.../clang-doc/tool/ClangDocMain.cpp | 13 ++++++------
5 files changed, 40 insertions(+), 12 deletions(-)
diff --git a/clang-tools-extra/clang-doc/Mapper.cpp b/clang-tools-extra/clang-doc/Mapper.cpp
index bb8b7952980ac..ba6311e85a2ee 100644
--- a/clang-tools-extra/clang-doc/Mapper.cpp
+++ b/clang-tools-extra/clang-doc/Mapper.cpp
@@ -17,6 +17,11 @@
namespace clang {
namespace doc {
+
+static std::unordered_set<std::string> USRVisited;
+
+static llvm::sys::Mutex USRVisitedGuard;
+
void MapASTVisitor::HandleTranslationUnit(ASTContext &Context) {
TraverseDecl(Context.getTranslationUnitDecl());
}
@@ -34,6 +39,17 @@ template <typename T> bool MapASTVisitor::mapDecl(const T *D) {
// If there is an error generating a USR for the decl, skip this decl.
if (index::generateUSRForDecl(D, USR))
return true;
+
+ // Prevent Visiting USR twice
+ {
+ std::lock_guard<llvm::sys::Mutex> Guard(USRVisitedGuard);
+ std::string Visited = USR.str().str();
+ if (USRVisited.count(Visited)) {
+ return true;
+ }
+ USRVisited.insert(Visited);
+ }
+
bool IsFileInRootDir;
llvm::SmallString<128> File =
getFile(D, D->getASTContext(), CDCtx.SourceRoot, IsFileInRootDir);
@@ -41,6 +57,11 @@ template <typename T> bool MapASTVisitor::mapDecl(const T *D) {
getLine(D, D->getASTContext()), File,
IsFileInRootDir, CDCtx.PublicOnly);
+ // Bitcode
+ if (CDCtx.NoBitcode) {
+ return true;
+ }
+
// A null in place of I indicates that the serializer is skipping this decl
// for some reason (e.g. we're only reporting public decls).
if (I.first)
diff --git a/clang-tools-extra/clang-doc/Mapper.h b/clang-tools-extra/clang-doc/Mapper.h
index cedde935ab743..1da7a66f1471a 100644
--- a/clang-tools-extra/clang-doc/Mapper.h
+++ b/clang-tools-extra/clang-doc/Mapper.h
@@ -20,6 +20,8 @@
#include "Representation.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Tooling/Execution.h"
+#include "llvm/Support/Mutex.h"
+#include <unordered_set>
using namespace clang::comments;
using namespace clang::tooling;
@@ -53,6 +55,7 @@ class MapASTVisitor : public clang::RecursiveASTVisitor<MapASTVisitor>,
const ASTContext &Context) const;
ClangDocContext CDCtx;
+
};
} // namespace doc
diff --git a/clang-tools-extra/clang-doc/Representation.cpp b/clang-tools-extra/clang-doc/Representation.cpp
index 2afff2929cf79..1ac662b4dd3d2 100644
--- a/clang-tools-extra/clang-doc/Representation.cpp
+++ b/clang-tools-extra/clang-doc/Representation.cpp
@@ -366,13 +366,13 @@ void Index::sort() {
ClangDocContext::ClangDocContext(tooling::ExecutionContext *ECtx,
StringRef ProjectName, bool PublicOnly,
- StringRef OutDirectory, StringRef SourceRoot,
- StringRef RepositoryUrl,
+ bool NoBitcode, StringRef OutDirectory,
+ StringRef SourceRoot, StringRef RepositoryUrl,
std::vector<std::string> UserStylesheets,
std::vector<std::string> JsScripts)
: ECtx(ECtx), ProjectName(ProjectName), PublicOnly(PublicOnly),
- OutDirectory(OutDirectory), UserStylesheets(UserStylesheets),
- JsScripts(JsScripts) {
+ NoBitcode(NoBitcode), OutDirectory(OutDirectory),
+ UserStylesheets(UserStylesheets), JsScripts(JsScripts) {
llvm::SmallString<128> SourceRootDir(SourceRoot);
if (SourceRoot.empty())
// If no SourceRoot was provided the current path is used as the default
diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h
index a6b144eb7fa2a..00f4f722f8b5a 100644
--- a/clang-tools-extra/clang-doc/Representation.h
+++ b/clang-tools-extra/clang-doc/Representation.h
@@ -480,12 +480,15 @@ mergeInfos(std::vector<std::unique_ptr<Info>> &Values);
struct ClangDocContext {
ClangDocContext() = default;
ClangDocContext(tooling::ExecutionContext *ECtx, StringRef ProjectName,
- bool PublicOnly, StringRef OutDirectory, StringRef SourceRoot,
- StringRef RepositoryUrl,
+ bool NoBitcode, bool PublicOnly, StringRef OutDirectory,
+ StringRef SourceRoot, StringRef RepositoryUrl,
std::vector<std::string> UserStylesheets,
std::vector<std::string> JsScripts);
+
+ llvm::StringMap<std::vector<std::unique_ptr<Info>>> Infos;
tooling::ExecutionContext *ECtx;
std::string ProjectName; // Name of project clang-doc is documenting.
+ bool NoBitcode; // Indicate if clang-doc will serialize Decl into Bitcode
bool PublicOnly; // Indicates if only public declarations are documented.
std::string OutDirectory; // Directory for outputting generated files.
std::string SourceRoot; // Directory where processed files are stored. Links
diff --git a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
index 5517522d7967d..fc111e090d72a 100644
--- a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
+++ b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
@@ -18,12 +18,9 @@
//===----------------------------------------------------------------------===//
#include "BitcodeReader.h"
-#include "BitcodeWriter.h"
#include "ClangDoc.h"
#include "Generators.h"
#include "Representation.h"
-#include "clang/AST/AST.h"
-#include "clang/AST/Decl.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchersInternal.h"
#include "clang/Driver/Options.h"
@@ -31,14 +28,11 @@
#include "clang/Tooling/AllTUsExecution.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Execution.h"
-#include "clang/Tooling/Tooling.h"
-#include "llvm/ADT/APFloat.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
@@ -67,6 +61,11 @@ static llvm::cl::opt<std::string>
llvm::cl::desc("Directory for outputting generated files."),
llvm::cl::init("docs"), llvm::cl::cat(ClangDocCategory));
+static llvm::cl::opt<bool> NoBitcode(
+ "nobitcode",
+ llvm::cl::desc("Do not emit bitcode for faster processing time"),
+ llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
+
static llvm::cl::opt<bool>
PublicOnly("public", llvm::cl::desc("Document only public declarations."),
llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
@@ -175,6 +174,7 @@ Example usage for a project using a compile commands database:
Executor->get()->getExecutionContext(),
ProjectName,
PublicOnly,
+ NoBitcode,
OutDirectory,
SourceRoot,
RepositoryUrl,
@@ -234,6 +234,7 @@ Example usage for a project using a compile commands database:
// First reducing phase (reduce all decls into one info per decl).
llvm::outs() << "Reducing " << USRToBitcode.size() << " infos...\n";
+
std::atomic<bool> Error;
Error = false;
llvm::sys::Mutex IndexMutex;
>From 59fa4d9d31961fd57fd5d28ef171b48fb7fb99d7 Mon Sep 17 00:00:00 2001
From: PeterChou1 <peter.chou at mail.utoronto.ca>
Date: Wed, 26 Jun 2024 14:36:09 -0400
Subject: [PATCH 2/2] [clang-doc] add short circuit to improve performance
---
clang-tools-extra/clang-doc/Mapper.cpp | 7 ++-----
clang-tools-extra/clang-doc/Mapper.h | 3 ---
clang-tools-extra/clang-doc/Representation.cpp | 8 ++++----
clang-tools-extra/clang-doc/Representation.h | 7 ++-----
clang-tools-extra/clang-doc/tool/ClangDocMain.cpp | 13 ++++++-------
5 files changed, 14 insertions(+), 24 deletions(-)
diff --git a/clang-tools-extra/clang-doc/Mapper.cpp b/clang-tools-extra/clang-doc/Mapper.cpp
index ba6311e85a2ee..b9ca6edd03b8c 100644
--- a/clang-tools-extra/clang-doc/Mapper.cpp
+++ b/clang-tools-extra/clang-doc/Mapper.cpp
@@ -12,7 +12,8 @@
#include "clang/AST/Comment.h"
#include "clang/Index/USRGeneration.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Error.h"
+#include "llvm/Support/Mutex.h"
+#include <unordered_set>
namespace clang {
namespace doc {
@@ -57,10 +58,6 @@ template <typename T> bool MapASTVisitor::mapDecl(const T *D) {
getLine(D, D->getASTContext()), File,
IsFileInRootDir, CDCtx.PublicOnly);
- // Bitcode
- if (CDCtx.NoBitcode) {
- return true;
- }
// A null in place of I indicates that the serializer is skipping this decl
// for some reason (e.g. we're only reporting public decls).
diff --git a/clang-tools-extra/clang-doc/Mapper.h b/clang-tools-extra/clang-doc/Mapper.h
index 1da7a66f1471a..cedde935ab743 100644
--- a/clang-tools-extra/clang-doc/Mapper.h
+++ b/clang-tools-extra/clang-doc/Mapper.h
@@ -20,8 +20,6 @@
#include "Representation.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Tooling/Execution.h"
-#include "llvm/Support/Mutex.h"
-#include <unordered_set>
using namespace clang::comments;
using namespace clang::tooling;
@@ -55,7 +53,6 @@ class MapASTVisitor : public clang::RecursiveASTVisitor<MapASTVisitor>,
const ASTContext &Context) const;
ClangDocContext CDCtx;
-
};
} // namespace doc
diff --git a/clang-tools-extra/clang-doc/Representation.cpp b/clang-tools-extra/clang-doc/Representation.cpp
index 1ac662b4dd3d2..2afff2929cf79 100644
--- a/clang-tools-extra/clang-doc/Representation.cpp
+++ b/clang-tools-extra/clang-doc/Representation.cpp
@@ -366,13 +366,13 @@ void Index::sort() {
ClangDocContext::ClangDocContext(tooling::ExecutionContext *ECtx,
StringRef ProjectName, bool PublicOnly,
- bool NoBitcode, StringRef OutDirectory,
- StringRef SourceRoot, StringRef RepositoryUrl,
+ StringRef OutDirectory, StringRef SourceRoot,
+ StringRef RepositoryUrl,
std::vector<std::string> UserStylesheets,
std::vector<std::string> JsScripts)
: ECtx(ECtx), ProjectName(ProjectName), PublicOnly(PublicOnly),
- NoBitcode(NoBitcode), OutDirectory(OutDirectory),
- UserStylesheets(UserStylesheets), JsScripts(JsScripts) {
+ OutDirectory(OutDirectory), UserStylesheets(UserStylesheets),
+ JsScripts(JsScripts) {
llvm::SmallString<128> SourceRootDir(SourceRoot);
if (SourceRoot.empty())
// If no SourceRoot was provided the current path is used as the default
diff --git a/clang-tools-extra/clang-doc/Representation.h b/clang-tools-extra/clang-doc/Representation.h
index 00f4f722f8b5a..a6b144eb7fa2a 100644
--- a/clang-tools-extra/clang-doc/Representation.h
+++ b/clang-tools-extra/clang-doc/Representation.h
@@ -480,15 +480,12 @@ mergeInfos(std::vector<std::unique_ptr<Info>> &Values);
struct ClangDocContext {
ClangDocContext() = default;
ClangDocContext(tooling::ExecutionContext *ECtx, StringRef ProjectName,
- bool NoBitcode, bool PublicOnly, StringRef OutDirectory,
- StringRef SourceRoot, StringRef RepositoryUrl,
+ bool PublicOnly, StringRef OutDirectory, StringRef SourceRoot,
+ StringRef RepositoryUrl,
std::vector<std::string> UserStylesheets,
std::vector<std::string> JsScripts);
-
- llvm::StringMap<std::vector<std::unique_ptr<Info>>> Infos;
tooling::ExecutionContext *ECtx;
std::string ProjectName; // Name of project clang-doc is documenting.
- bool NoBitcode; // Indicate if clang-doc will serialize Decl into Bitcode
bool PublicOnly; // Indicates if only public declarations are documented.
std::string OutDirectory; // Directory for outputting generated files.
std::string SourceRoot; // Directory where processed files are stored. Links
diff --git a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
index fc111e090d72a..5517522d7967d 100644
--- a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
+++ b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
@@ -18,9 +18,12 @@
//===----------------------------------------------------------------------===//
#include "BitcodeReader.h"
+#include "BitcodeWriter.h"
#include "ClangDoc.h"
#include "Generators.h"
#include "Representation.h"
+#include "clang/AST/AST.h"
+#include "clang/AST/Decl.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchersInternal.h"
#include "clang/Driver/Options.h"
@@ -28,11 +31,14 @@
#include "clang/Tooling/AllTUsExecution.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Execution.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
@@ -61,11 +67,6 @@ static llvm::cl::opt<std::string>
llvm::cl::desc("Directory for outputting generated files."),
llvm::cl::init("docs"), llvm::cl::cat(ClangDocCategory));
-static llvm::cl::opt<bool> NoBitcode(
- "nobitcode",
- llvm::cl::desc("Do not emit bitcode for faster processing time"),
- llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
-
static llvm::cl::opt<bool>
PublicOnly("public", llvm::cl::desc("Document only public declarations."),
llvm::cl::init(false), llvm::cl::cat(ClangDocCategory));
@@ -174,7 +175,6 @@ Example usage for a project using a compile commands database:
Executor->get()->getExecutionContext(),
ProjectName,
PublicOnly,
- NoBitcode,
OutDirectory,
SourceRoot,
RepositoryUrl,
@@ -234,7 +234,6 @@ Example usage for a project using a compile commands database:
// First reducing phase (reduce all decls into one info per decl).
llvm::outs() << "Reducing " << USRToBitcode.size() << " infos...\n";
-
std::atomic<bool> Error;
Error = false;
llvm::sys::Mutex IndexMutex;
More information about the cfe-commits
mailing list