[clang] [WIP] Implement `-dump-deserialized-declaration-ranges` flag. (PR #133910)
Viktoriia Bakalova via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 4 04:50:51 PDT 2025
https://github.com/VitaNuo updated https://github.com/llvm/llvm-project/pull/133910
>From 6d61256a73c64ef4351266d6ef76067f5a2568db Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Fri, 4 Apr 2025 13:40:55 +0200
Subject: [PATCH 1/2] [WIP] Implement -dump-deserialized-declaration-ranges
flag to dump source ranges of deserialized decls.
---
clang/include/clang/Driver/Options.td | 4 +
.../include/clang/Frontend/FrontendOptions.h | 3 +
clang/lib/Frontend/FrontendAction.cpp | 161 +++++++++++++++++-
.../dump-deserialized-declaration-ranges.cpp | 80 +++++++++
4 files changed, 243 insertions(+), 5 deletions(-)
create mode 100644 clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 3af072242d039..1737e40b776e1 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7968,6 +7968,10 @@ def print_dependency_directives_minimized_source : Flag<["-"],
"print-dependency-directives-minimized-source">,
HelpText<"Print the output of the dependency directives source minimizer">;
}
+def dump_deserialized_declaration_ranges : Joined<["-"],
+ "dump-deserialized-declaration-ranges=">,
+ HelpText<"Dump ranges of deserialized declarations to aid debugging and minimization">,
+ MarshallingInfoString<FrontendOpts<"DumpDeserializedDeclarationRangesPath">>;
defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse,
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index a9c9849ff52ab..8ef9ce9db8783 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -530,6 +530,9 @@ class FrontendOptions {
/// Output Path for module output file.
std::string ModuleOutputPath;
+ /// Output path to dump ranges of deserialized declarations.
+ std::string DumpDeserializedDeclarationRangesPath;
+
public:
FrontendOptions()
: DisableFree(false), RelocatablePCH(false), ShowHelp(false),
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 2d77f06be7446..f98aa5ab1fe51 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -15,6 +15,7 @@
#include "clang/Basic/FileEntry.h"
#include "clang/Basic/LangStandard.h"
#include "clang/Basic/Sarif.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Stack.h"
#include "clang/Frontend/ASTUnit.h"
#include "clang/Frontend/CompilerInstance.h"
@@ -35,6 +36,7 @@
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/GlobalModuleIndex.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -49,6 +51,144 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry)
namespace {
+/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations to aid debugging and bug minimization.
+/// It implements ASTConsumer and ASTDeserializationListener, so that an object of DeserializedDeclsLineRangePrinter registers
+/// as its own listener.
+/// The ASTDeserializationListener interface provides the DeclRead callback that we use to collect the deserialized Decls.
+/// Note that printing or otherwise processing them as this point is dangerous, since that could trigger additional
+/// deserialization and crash compilation.
+/// Therefore, we process the collected Decls in HandleTranslationUnit method of ASTConsumer.
+/// This is a safe point, since we know that by this point all the Decls needed by the compiler frontend have been
+/// deserialized. In case our processing causes further deserialization, DeclRead from the listener might be called again.
+/// However, at that point we don't accept any more Decls for processing.
+class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener, public ASTConsumer {
+public:
+ explicit DeserializedDeclsLineRangePrinter(SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
+ : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}
+
+ void DeclRead(GlobalDeclID ID, const Decl *D) override {
+ if (!IsCollectingDecls) {
+ return;
+ }
+ if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
+ isa<NamespaceDecl>(D))
+ return;
+ if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext())
+ return;
+ PendingDecls.push_back(D);
+ ASTDeserializationListener::DeclRead(ID, D);
+ }
+
+ using Position = std::pair<unsigned, unsigned>;
+ struct RequiredRanges {
+ StringRef Filename;
+ std::vector<std::pair<Position, Position>> FromTo;
+ };
+ void HandleTranslationUnit(ASTContext &Context) override {
+ IsCollectingDecls = false;
+ std::vector<const Decl *> Decls = std::move(PendingDecls);
+ if (!PendingDecls.empty()) {
+ llvm::errs() << "Deserialized more decls while printing, total of "
+ << PendingDecls.size() << "\n";
+ PendingDecls.clear();
+ }
+
+ // Merge ranges in each of the files. For simplicity, track lines and hope
+ // they do not break things.
+ struct FileData {
+ std::vector<std::pair<Position, Position>> FromTo;
+ std::vector<std::pair<unsigned, unsigned>> Columns;
+ OptionalFileEntryRef Ref;
+ };
+ llvm::DenseMap<const FileEntry *, FileData> FileToLines;
+ for (const Decl *D : Decls) {
+ CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
+ if (!R.isValid())
+ continue;
+
+ auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
+ if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd())))
+ continue;
+
+ auto &Data = FileToLines[F];
+ if (!Data.Ref)
+ Data.Ref =
+ SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
+ Data.FromTo.push_back({{SM.getSpellingLineNumber(R.getBegin()), SM.getSpellingColumnNumber(R.getBegin())},
+ {SM.getSpellingLineNumber(R.getEnd()), SM.getSpellingColumnNumber(R.getEnd())}});
+ }
+
+ std::vector<RequiredRanges> Result;
+ for (auto &[F, Data] : FileToLines) {
+ auto& FromTo = Data.FromTo;
+ assert(!FromTo.empty());
+
+ if (!Data.Ref) continue;
+
+ llvm::sort(FromTo);
+
+ std::vector<std::pair<Position, Position>> MergedLines;
+ MergedLines.push_back(FromTo.front());
+ for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) {
+ if (MergedLines.back().second < It->first) {
+ MergedLines.push_back(*It);
+ continue;
+ }
+ if (MergedLines.back().second < It->second)
+ MergedLines.back().second = It->second;
+ }
+ Result.push_back({Data.Ref->getName(), MergedLines});
+ }
+ printJson(Result);
+ }
+
+ void printJson(const std::vector<RequiredRanges>& Result) {
+ *OS << "{\n";
+ *OS << " \"required_ranges\": [\n";
+ for (size_t i = 0; i < Result.size(); ++i) {
+ auto &F = Result[i].Filename;
+ auto &MergedLines = Result[i].FromTo;
+ *OS << " {\n";
+ *OS << " \"file\": \"" << F << "\",\n";
+ *OS << " \"range\": [\n";
+ for (size_t j = 0; j < MergedLines.size(); ++j) {
+ auto &From = MergedLines[j].first;
+ auto &To = MergedLines[j].second;
+ *OS << " {\n";
+ *OS << " \"from\": {\n";
+ *OS << " \"line\": " << From.first << ",\n";
+ *OS << " \"column\": " << From.second << "\n },\n";
+ *OS << " \"to\": {\n";
+ *OS << " \"line\": " << To.first << ",\n";
+ *OS << " \"column\": " << To.second << "\n }\n";
+ *OS << " }";
+ if (j < MergedLines.size() - 1) {
+ *OS << ",";
+ }
+ *OS << "\n";
+ }
+ *OS << " ]\n }";
+ if (i < Result.size() - 1) {
+ *OS << ",";
+ }
+ *OS << "\n";
+ }
+ *OS << " ]\n";
+ *OS << "}\n";
+ }
+
+ ASTDeserializationListener *GetASTDeserializationListener() override {
+ return this;
+ }
+
+private:
+std::vector<const Decl *> PendingDecls;
+bool IsCollectingDecls = true;
+const SourceManager &SM;
+std::unique_ptr<llvm::raw_ostream> OS;
+};
+
+
/// Dumps deserialized declarations.
class DeserializedDeclsDumper : public DelegatingDeserializationListener {
public:
@@ -121,6 +261,19 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
if (!Consumer)
return nullptr;
+ std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+ llvm::StringRef DumpDeserializedDeclarationRangesPath = CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath;
+ if (!DumpDeserializedDeclarationRangesPath.empty()) {
+ std::error_code ErrorCode;
+ auto FileStream = std::make_unique<llvm::raw_fd_ostream>(DumpDeserializedDeclarationRangesPath, ErrorCode, llvm::sys::fs::OF_None);
+ if (!ErrorCode) {
+ auto Printer = std::make_unique<DeserializedDeclsLineRangePrinter>(CI.getSourceManager(), std::move(FileStream));
+ Consumers.push_back(std::move(Printer));
+ } else {
+ llvm::errs() << "Failed to create output file for -dump-deserialized-declaration-ranges flag, file path: " << DumpDeserializedDeclarationRangesPath << ", error: " << ErrorCode.message() << "\n";
+ }
+ }
+
// Validate -add-plugin args.
bool FoundAllPlugins = true;
for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) {
@@ -138,17 +291,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
if (!FoundAllPlugins)
return nullptr;
- // If there are no registered plugins we don't need to wrap the consumer
- if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
- return Consumer;
-
// If this is a code completion run, avoid invoking the plugin consumers
if (CI.hasCodeCompletionConsumer())
return Consumer;
// Collect the list of plugins that go before the main action (in Consumers)
// or after it (in AfterConsumers)
- std::vector<std::unique_ptr<ASTConsumer>> Consumers;
std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers;
for (const FrontendPluginRegistry::entry &Plugin :
FrontendPluginRegistry::entries()) {
@@ -191,6 +339,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
Consumers.push_back(std::move(C));
}
+ assert(Consumers.size() >= 1 && "should have added the main consumer");
+ if (Consumers.size() == 1)
+ return std::move(Consumers.front());
return std::make_unique<MultiplexConsumer>(std::move(Consumers));
}
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
new file mode 100644
index 0000000000000..bb43cb7c40e77
--- /dev/null
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -0,0 +1,80 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
+// RUN: %clang_cc1 -xc++ -fmodules -dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o
+// RUN: cat %t/decls
+// RUN: echo '{ \
+// RUN: "required_ranges": [\
+// RUN: {\
+// RUN: "file": "/usr/local/google/home/bakalova/llvm-project/build/tools/clang/test/Frontend/Output/dump-deserialized-declaration-ranges.cpp.tmp/foo.h",\
+// RUN: "range": [\
+// RUN: {\
+// RUN: "from": {\
+// RUN: "line": 1,\
+// RUN: "column": 1\
+// RUN: },\
+// RUN: "to": {\
+// RUN: "line": 9,\
+// RUN: "column": 1\
+// RUN: }\
+// RUN: },\
+// RUN: {\
+// RUN: "from": {\
+// RUN: "line": 11,\
+// RUN: "column": 1\
+// RUN: },\
+// RUN: "to": {\
+// RUN: "line": 11,\
+// RUN: "column": 12\
+// RUN: }\
+// RUN: },\
+// RUN: {\
+// RUN: "from": {\
+// RUN: "line": 13,\
+// RUN: "column": 1\
+// RUN: },\
+// RUN: "to": {\
+// RUN: "line": 15,\
+// RUN: "column": 1\
+// RUN: }\
+// RUN: }\
+// RUN: ]\
+// RUN: }\
+// RUN: ]\
+// RUN:}' > %t/expected_decls
+// RUN: jq '.' %t/expected_decls > %t/expected_decls_formatted
+// RUN: diff %t/decls %t/expected_decls_formatted
+
+//--- foo.cppmap
+module foo {
+ header "foo.h"
+ export *
+}
+
+//--- foo.h
+class MyData {
+public:
+ MyData(int val): value_(val) {}
+ int getValue() const {
+ return 5;
+ }
+private:
+ int value_;
+};
+
+extern int global_value;
+
+int multiply(int a, int b) {
+ return a * b;
+}
+
+//--- foo.cpp
+#include "foo.h"
+int global_value = 5;
+int main() {
+ MyData data(5);
+ int current_value = data.getValue();
+ int doubled_value = multiply(current_value, 2);
+ int final_result = doubled_value + global_value;
+}
>From e1bec531d1c37b78a4bdc1058c051e2c5956d534 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Fri, 4 Apr 2025 13:50:36 +0200
Subject: [PATCH 2/2] Format.
---
clang/lib/Frontend/FrontendAction.cpp | 77 ++++++++++++++++-----------
1 file changed, 45 insertions(+), 32 deletions(-)
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index f98aa5ab1fe51..50a12ac854454 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -51,19 +51,24 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry)
namespace {
-/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations to aid debugging and bug minimization.
-/// It implements ASTConsumer and ASTDeserializationListener, so that an object of DeserializedDeclsLineRangePrinter registers
-/// as its own listener.
-/// The ASTDeserializationListener interface provides the DeclRead callback that we use to collect the deserialized Decls.
-/// Note that printing or otherwise processing them as this point is dangerous, since that could trigger additional
-/// deserialization and crash compilation.
-/// Therefore, we process the collected Decls in HandleTranslationUnit method of ASTConsumer.
-/// This is a safe point, since we know that by this point all the Decls needed by the compiler frontend have been
-/// deserialized. In case our processing causes further deserialization, DeclRead from the listener might be called again.
+/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations
+/// to aid debugging and bug minimization. It implements ASTConsumer and
+/// ASTDeserializationListener, so that an object of
+/// DeserializedDeclsLineRangePrinter registers as its own listener. The
+/// ASTDeserializationListener interface provides the DeclRead callback that we
+/// use to collect the deserialized Decls. Note that printing or otherwise
+/// processing them as this point is dangerous, since that could trigger
+/// additional deserialization and crash compilation. Therefore, we process the
+/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a
+/// safe point, since we know that by this point all the Decls needed by the
+/// compiler frontend have been deserialized. In case our processing causes
+/// further deserialization, DeclRead from the listener might be called again.
/// However, at that point we don't accept any more Decls for processing.
-class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener, public ASTConsumer {
+class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener,
+ public ASTConsumer {
public:
- explicit DeserializedDeclsLineRangePrinter(SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
+ explicit DeserializedDeclsLineRangePrinter(
+ SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
: ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}
void DeclRead(GlobalDeclID ID, const Decl *D) override {
@@ -71,7 +76,7 @@ class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener, pub
return;
}
if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
- isa<NamespaceDecl>(D))
+ isa<NamespaceDecl>(D))
return;
if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext())
return;
@@ -81,15 +86,15 @@ class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener, pub
using Position = std::pair<unsigned, unsigned>;
struct RequiredRanges {
- StringRef Filename;
- std::vector<std::pair<Position, Position>> FromTo;
+ StringRef Filename;
+ std::vector<std::pair<Position, Position>> FromTo;
};
void HandleTranslationUnit(ASTContext &Context) override {
IsCollectingDecls = false;
std::vector<const Decl *> Decls = std::move(PendingDecls);
if (!PendingDecls.empty()) {
llvm::errs() << "Deserialized more decls while printing, total of "
- << PendingDecls.size() << "\n";
+ << PendingDecls.size() << "\n";
PendingDecls.clear();
}
@@ -112,18 +117,20 @@ class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener, pub
auto &Data = FileToLines[F];
if (!Data.Ref)
- Data.Ref =
- SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
- Data.FromTo.push_back({{SM.getSpellingLineNumber(R.getBegin()), SM.getSpellingColumnNumber(R.getBegin())},
- {SM.getSpellingLineNumber(R.getEnd()), SM.getSpellingColumnNumber(R.getEnd())}});
+ Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
+ Data.FromTo.push_back({{SM.getSpellingLineNumber(R.getBegin()),
+ SM.getSpellingColumnNumber(R.getBegin())},
+ {SM.getSpellingLineNumber(R.getEnd()),
+ SM.getSpellingColumnNumber(R.getEnd())}});
}
std::vector<RequiredRanges> Result;
for (auto &[F, Data] : FileToLines) {
- auto& FromTo = Data.FromTo;
+ auto &FromTo = Data.FromTo;
assert(!FromTo.empty());
- if (!Data.Ref) continue;
+ if (!Data.Ref)
+ continue;
llvm::sort(FromTo);
@@ -142,7 +149,7 @@ class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener, pub
printJson(Result);
}
- void printJson(const std::vector<RequiredRanges>& Result) {
+ void printJson(const std::vector<RequiredRanges> &Result) {
*OS << "{\n";
*OS << " \"required_ranges\": [\n";
for (size_t i = 0; i < Result.size(); ++i) {
@@ -182,13 +189,12 @@ class DeserializedDeclsLineRangePrinter : public ASTDeserializationListener, pub
}
private:
-std::vector<const Decl *> PendingDecls;
-bool IsCollectingDecls = true;
-const SourceManager &SM;
-std::unique_ptr<llvm::raw_ostream> OS;
+ std::vector<const Decl *> PendingDecls;
+ bool IsCollectingDecls = true;
+ const SourceManager &SM;
+ std::unique_ptr<llvm::raw_ostream> OS;
};
-
/// Dumps deserialized declarations.
class DeserializedDeclsDumper : public DelegatingDeserializationListener {
public:
@@ -262,15 +268,22 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
return nullptr;
std::vector<std::unique_ptr<ASTConsumer>> Consumers;
- llvm::StringRef DumpDeserializedDeclarationRangesPath = CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath;
+ llvm::StringRef DumpDeserializedDeclarationRangesPath =
+ CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath;
if (!DumpDeserializedDeclarationRangesPath.empty()) {
std::error_code ErrorCode;
- auto FileStream = std::make_unique<llvm::raw_fd_ostream>(DumpDeserializedDeclarationRangesPath, ErrorCode, llvm::sys::fs::OF_None);
+ auto FileStream = std::make_unique<llvm::raw_fd_ostream>(
+ DumpDeserializedDeclarationRangesPath, ErrorCode,
+ llvm::sys::fs::OF_None);
if (!ErrorCode) {
- auto Printer = std::make_unique<DeserializedDeclsLineRangePrinter>(CI.getSourceManager(), std::move(FileStream));
+ auto Printer = std::make_unique<DeserializedDeclsLineRangePrinter>(
+ CI.getSourceManager(), std::move(FileStream));
Consumers.push_back(std::move(Printer));
} else {
- llvm::errs() << "Failed to create output file for -dump-deserialized-declaration-ranges flag, file path: " << DumpDeserializedDeclarationRangesPath << ", error: " << ErrorCode.message() << "\n";
+ llvm::errs() << "Failed to create output file for "
+ "-dump-deserialized-declaration-ranges flag, file path: "
+ << DumpDeserializedDeclarationRangesPath
+ << ", error: " << ErrorCode.message() << "\n";
}
}
@@ -340,7 +353,7 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
}
assert(Consumers.size() >= 1 && "should have added the main consumer");
- if (Consumers.size() == 1)
+ if (Consumers.size() == 1)
return std::move(Consumers.front());
return std::make_unique<MultiplexConsumer>(std::move(Consumers));
}
More information about the cfe-commits
mailing list