[clang] [WIP] Implement `-dump-deserialized-declaration-ranges` flag. (PR #133910)
Viktoriia Bakalova via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 4 08:45:42 PDT 2025
https://github.com/VitaNuo updated https://github.com/llvm/llvm-project/pull/133910
>From cfa057b4d43ebe7f94ccd4f387a94359beaa29b2 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Fri, 4 Apr 2025 17:45:24 +0200
Subject: [PATCH] This commit implements a CC1 flag
`-dump-deserialized-declaration-ranges`. The flag allows to specify a file
path to dump ranges of deserialized declarations in `ASTReader`. Example
usage:
```
clang -Xclang=-dump-deserialized-declaration-ranges=/tmp/decls -c file.cc -o file.o
```
Example output:
```
// /tmp/decls
{
"required_ranges": [
{
"file": "foo.h",
"range": [
{
"from": {
"line": 26,
"column": 1
},
"to": {
"line": 27,
"column": 77
}
}
]
},
{
"file": "bar.h",
"range": [
{
"from": {
"line": 30,
"column": 1
},
"to": {
"line": 35,
"column": 1
}
},
{
"from": {
"line": 92,
"column": 1
},
"to": {
"line": 95,
"column": 1
}
}
]
}
]
}
```
Specifying the flag creates an instance of `DeserializedDeclsLineRangePrinter`, which dumps ranges of deserialized declarations to aid debugging and bug minimization.
Required ranges are computed from source ranges of Decls. `TranslationUnitDecl`, `LinkageSpecDecl` and `NamespaceDecl` are ignored for the sake of this PR.
Technical details:
* `DeserializedDeclsLineRangePrinter` implements `ASTConsumer` and `ASTDeserializationListener`, so that an object of `DeserializedDeclsLineRangePrinter` registers as its own listener.
* `ASTDeserializationListener` interface provides the `DeclRead` callback that we use to collect the deserialized Decls.
Printing or otherwise processing them as this point is dangerous, since that could trigger additional deserialization and crash compilation.
* The collected Decls are processed in `HandleTranslationUnit` method of `ASTConsumer`. This is a safe point, since we know that by this point all the Decls needed by the compiler frontend have been deserialized.
* In case our processing causes further deserialization, `DeclRead` from the listener might be called again. However, at that point we don't accept any more Decls for processing.
---
clang/include/clang/Driver/Options.td | 4 +
.../include/clang/Frontend/FrontendOptions.h | 3 +
clang/lib/Frontend/FrontendAction.cpp | 190 +++++++++++++++++-
.../dump-deserialized-declaration-ranges.cpp | 118 +++++++++++
4 files changed, 310 insertions(+), 5 deletions(-)
create mode 100644 clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 3af072242d039..1737e40b776e1 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7968,6 +7968,10 @@ def print_dependency_directives_minimized_source : Flag<["-"],
"print-dependency-directives-minimized-source">,
HelpText<"Print the output of the dependency directives source minimizer">;
}
+def dump_deserialized_declaration_ranges : Joined<["-"],
+ "dump-deserialized-declaration-ranges=">,
+ HelpText<"Dump ranges of deserialized declarations to aid debugging and minimization">,
+ MarshallingInfoString<FrontendOpts<"DumpDeserializedDeclarationRangesPath">>;
defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse,
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index a9c9849ff52ab..8ef9ce9db8783 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -530,6 +530,9 @@ class FrontendOptions {
/// Output Path for module output file.
std::string ModuleOutputPath;
+ /// Output path to dump ranges of deserialized declarations.
+ std::string DumpDeserializedDeclarationRangesPath;
+
public:
FrontendOptions()
: DisableFree(false), RelocatablePCH(false), ShowHelp(false),
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 2d77f06be7446..1f939f7722d19 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -15,6 +15,8 @@
#include "clang/Basic/FileEntry.h"
#include "clang/Basic/LangStandard.h"
#include "clang/Basic/Sarif.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Stack.h"
#include "clang/Frontend/ASTUnit.h"
#include "clang/Frontend/CompilerInstance.h"
@@ -35,6 +37,7 @@
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/GlobalModuleIndex.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -49,6 +52,166 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry)
namespace {
+/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations
+/// to aid debugging and bug minimization. It implements ASTConsumer and
+/// ASTDeserializationListener, so that an object of
+/// DeserializedDeclsLineRangePrinter registers as its own listener. The
+/// ASTDeserializationListener interface provides the DeclRead callback that we
+/// use to collect the deserialized Decls. Note that printing or otherwise
+/// processing them as this point is dangerous, since that could trigger
+/// additional deserialization and crash compilation. Therefore, we process the
+/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a
+/// safe point, since we know that by this point all the Decls needed by the
+/// compiler frontend have been deserialized. In case our processing causes
+/// further deserialization, DeclRead from the listener might be called again.
+/// However, at that point we don't accept any more Decls for processing.
+class DeserializedDeclsLineRangePrinter : public ASTConsumer,
+ ASTDeserializationListener {
+public:
+ explicit DeserializedDeclsLineRangePrinter(
+ SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
+ : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}
+
+ ASTDeserializationListener *GetASTDeserializationListener() override {
+ return this;
+ }
+
+ void DeclRead(GlobalDeclID ID, const Decl *D) override {
+ if (!IsCollectingDecls)
+ return;
+ if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
+ isa<NamespaceDecl>(D)) {
+ // These decls cover a lot of nested declarations that might not be used,
+ // reducing the granularity and making the output less useful.
+ return;
+ }
+ if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) {
+ // We choose to work at namespace level to reduce complexity and the
+ // number of cases we care about.
+ return;
+ }
+ PendingDecls.push_back(D);
+ }
+
+ struct Position {
+ unsigned Line;
+ unsigned Column;
+
+ bool operator<(const Position &other) const {
+ if (Line < other.Line)
+ return true;
+ if (Line > other.Line)
+ return false;
+ return Column < other.Column;
+ }
+
+ static Position GetSpelling(const SourceManager &SM,
+ const SourceLocation &SL) {
+ return {SM.getSpellingLineNumber(SL), SM.getSpellingColumnNumber(SL)};
+ }
+ };
+
+ struct RequiredRanges {
+ StringRef Filename;
+ std::vector<std::pair<Position, Position>> FromTo;
+ };
+ void HandleTranslationUnit(ASTContext &Context) override {
+ assert(IsCollectingDecls && "HandleTranslationUnit called twice?");
+ IsCollectingDecls = false;
+
+ // Merge ranges in each of the files.
+ struct FileData {
+ std::vector<std::pair<Position, Position>> FromTo;
+ OptionalFileEntryRef Ref;
+ };
+ llvm::DenseMap<const FileEntry *, FileData> FileToLines;
+ for (const Decl *D : PendingDecls) {
+ CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
+ if (!R.isValid())
+ continue;
+
+ auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
+ if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
+ // Such cases are rare and difficult to handle.
+ continue;
+ }
+
+ auto &Data = FileToLines[F];
+ if (!Data.Ref)
+ Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
+ Data.FromTo.push_back({Position::GetSpelling(SM, R.getBegin()),
+ Position::GetSpelling(SM, R.getEnd())});
+ }
+
+ // To simplify output, merge consecutive and intersecting ranges.
+ std::vector<RequiredRanges> Result;
+ for (auto &[F, Data] : FileToLines) {
+ auto &FromTo = Data.FromTo;
+ assert(!FromTo.empty());
+
+ if (!Data.Ref)
+ continue;
+
+ llvm::sort(FromTo);
+
+ std::vector<std::pair<Position, Position>> MergedRanges;
+ MergedRanges.push_back(FromTo.front());
+ for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) {
+ if (MergedRanges.back().second < It->first) {
+ MergedRanges.push_back(*It);
+ continue;
+ }
+ if (MergedRanges.back().second < It->second)
+ MergedRanges.back().second = It->second;
+ }
+ Result.push_back({Data.Ref->getName(), MergedRanges});
+ }
+ printJson(Result);
+ }
+
+private:
+ std::vector<const Decl *> PendingDecls;
+ bool IsCollectingDecls = true;
+ const SourceManager &SM;
+ std::unique_ptr<llvm::raw_ostream> OS;
+
+ void printJson(llvm::ArrayRef<RequiredRanges> Result) {
+ *OS << "{\n";
+ *OS << R"( "required_ranges": [)" << "\n";
+ for (size_t I = 0; I < Result.size(); ++I) {
+ auto &F = Result[I].Filename;
+ auto &MergedRanges = Result[I].FromTo;
+ *OS << R"( {)" << "\n";
+ *OS << R"( "file": ")" << F << "\"," << "\n";
+ *OS << R"( "range": [)" << "\n";
+ for (size_t J = 0; J < MergedRanges.size(); ++J) {
+ auto &From = MergedRanges[J].first;
+ auto &To = MergedRanges[J].second;
+ *OS << R"( {)" << "\n";
+ *OS << R"( "from": {)" << "\n";
+ *OS << R"( "line": )" << From.Line << ",\n";
+ *OS << R"( "column": )" << From.Column << "\n"
+ << R"( },)" << "\n";
+ *OS << R"( "to": {)" << "\n";
+ *OS << R"( "line": )" << To.Line << ",\n";
+ *OS << R"( "column": )" << To.Column << "\n"
+ << R"( })" << "\n";
+ *OS << R"( })";
+ if (J < MergedRanges.size() - 1) {
+ *OS << ",";
+ }
+ *OS << "\n";
+ }
+ *OS << " ]" << "\n" << " }";
+ if (I < Result.size() - 1)
+ *OS << ",";
+ *OS << "\n";
+ }
+ *OS << " ]\n";
+ *OS << "}\n";
+ }
+};
+
/// Dumps deserialized declarations.
class DeserializedDeclsDumper : public DelegatingDeserializationListener {
public:
@@ -121,6 +284,25 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
if (!Consumer)
return nullptr;
+ std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+ llvm::StringRef DumpDeserializedDeclarationRangesPath =
+ CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath;
+ if (!DumpDeserializedDeclarationRangesPath.empty()) {
+ std::error_code ErrorCode;
+ auto FileStream = std::make_unique<llvm::raw_fd_ostream>(
+ DumpDeserializedDeclarationRangesPath, ErrorCode,
+ llvm::sys::fs::OF_None);
+ if (!ErrorCode) {
+ Consumers.push_back(std::make_unique<DeserializedDeclsLineRangePrinter>(
+ CI.getSourceManager(), std::move(FileStream)));
+ } else {
+ llvm::errs() << "Failed to create output file for "
+ "-dump-deserialized-declaration-ranges flag, file path: "
+ << DumpDeserializedDeclarationRangesPath
+ << ", error: " << ErrorCode.message() << "\n";
+ }
+ }
+
// Validate -add-plugin args.
bool FoundAllPlugins = true;
for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) {
@@ -138,17 +320,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
if (!FoundAllPlugins)
return nullptr;
- // If there are no registered plugins we don't need to wrap the consumer
- if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
- return Consumer;
-
// If this is a code completion run, avoid invoking the plugin consumers
if (CI.hasCodeCompletionConsumer())
return Consumer;
// Collect the list of plugins that go before the main action (in Consumers)
// or after it (in AfterConsumers)
- std::vector<std::unique_ptr<ASTConsumer>> Consumers;
std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers;
for (const FrontendPluginRegistry::entry &Plugin :
FrontendPluginRegistry::entries()) {
@@ -191,6 +368,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
Consumers.push_back(std::move(C));
}
+ assert(Consumers.size() >= 1 && "should have added the main consumer");
+ if (Consumers.size() == 1)
+ return std::move(Consumers.front());
return std::make_unique<MultiplexConsumer>(std::move(Consumers));
}
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
new file mode 100644
index 0000000000000..c72fc32479b0f
--- /dev/null
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -0,0 +1,118 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
+// RUN: %clang_cc1 -xc++ -fmodules -dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o
+// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
+// RANGE:{
+// RANGE-NEXT: "required_ranges": [
+// RANGE-NEXT: {
+// RANGE-NEXT: "file": "{{.+}}/foo.h",
+// RANGE-NEXT: "range": [
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 1,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 9,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 11,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 11,
+// RANGE-NEXT: "column": 12
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 13,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 15,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: }
+// RANGE-NEXT: }
+// RANGE-NEXT: ]
+// RANGE-NEXT: }
+// RANGE-NEXT: ]
+// RANGE-NEXT:}
+// RUN: echo -e '{\n\
+// RUN: "required_ranges": [\n\
+// RUN: {\n\
+// RUN: "file": "%t/foo.h",\n\
+// RUN: "range": [\n\
+// RUN: {\n\
+// RUN: "from": {\n\
+// RUN: "line": 1,\n\
+// RUN: "column": 1\n\
+// RUN: },\n\
+// RUN: "to": {\n\
+// RUN: "line": 9,\n\
+// RUN: "column": 1\n\
+// RUN: }\n\
+// RUN: },\n\
+// RUN: {\n\
+// RUN: "from": {\n\
+// RUN: "line": 11,\n\
+// RUN: "column": 1\n\
+// RUN: },\n\
+// RUN: "to": {\n\
+// RUN: "line": 11,\n\
+// RUN: "column": 12\n\
+// RUN: }\n\
+// RUN: },\n\
+// RUN: {\n\
+// RUN: "from": {\n\
+// RUN: "line": 13,\n\
+// RUN: "column": 1\n\
+// RUN: },\n\
+// RUN: "to": {\n\
+// RUN: "line": 15,\n\
+// RUN: "column": 1\n\
+// RUN: }\n\
+// RUN: }\n\
+// RUN: ]\n\
+// RUN: }\n\
+// RUN: ]\n\
+// RUN:}' > %t/expected_decls
+// RUN: diff %t/decls %t/expected_decls
+
+//--- foo.cppmap
+module foo {
+ header "foo.h"
+ export *
+}
+
+//--- foo.h
+class MyData {
+public:
+ MyData(int val): value_(val) {}
+ int getValue() const {
+ return 5;
+ }
+private:
+ int value_;
+};
+
+extern int global_value;
+
+int multiply(int a, int b) {
+ return a * b;
+}
+
+//--- foo.cpp
+#include "foo.h"
+int global_value = 5;
+int main() {
+ MyData data(5);
+ int current_value = data.getValue();
+ int doubled_value = multiply(current_value, 2);
+ int final_result = doubled_value + global_value;
+}
More information about the cfe-commits
mailing list