[clang] Implement `-dump-minimization-hints` flag. (PR #133910)
Viktoriia Bakalova via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 9 08:52:53 PDT 2025
https://github.com/VitaNuo updated https://github.com/llvm/llvm-project/pull/133910
>From cfa057b4d43ebe7f94ccd4f387a94359beaa29b2 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Fri, 4 Apr 2025 17:45:24 +0200
Subject: [PATCH 01/10] This commit implements a CC1 flag
`-dump-deserialized-declaration-ranges`. The flag allows to specify a file
path to dump ranges of deserialized declarations in `ASTReader`. Example
usage:
```
clang -Xclang=-dump-deserialized-declaration-ranges=/tmp/decls -c file.cc -o file.o
```
Example output:
```
// /tmp/decls
{
"required_ranges": [
{
"file": "foo.h",
"range": [
{
"from": {
"line": 26,
"column": 1
},
"to": {
"line": 27,
"column": 77
}
}
]
},
{
"file": "bar.h",
"range": [
{
"from": {
"line": 30,
"column": 1
},
"to": {
"line": 35,
"column": 1
}
},
{
"from": {
"line": 92,
"column": 1
},
"to": {
"line": 95,
"column": 1
}
}
]
}
]
}
```
Specifying the flag creates an instance of `DeserializedDeclsLineRangePrinter`, which dumps ranges of deserialized declarations to aid debugging and bug minimization.
Required ranges are computed from source ranges of Decls. `TranslationUnitDecl`, `LinkageSpecDecl` and `NamespaceDecl` are ignored for the sake of this PR.
Technical details:
* `DeserializedDeclsLineRangePrinter` implements `ASTConsumer` and `ASTDeserializationListener`, so that an object of `DeserializedDeclsLineRangePrinter` registers as its own listener.
* `ASTDeserializationListener` interface provides the `DeclRead` callback that we use to collect the deserialized Decls.
Printing or otherwise processing them as this point is dangerous, since that could trigger additional deserialization and crash compilation.
* The collected Decls are processed in `HandleTranslationUnit` method of `ASTConsumer`. This is a safe point, since we know that by this point all the Decls needed by the compiler frontend have been deserialized.
* In case our processing causes further deserialization, `DeclRead` from the listener might be called again. However, at that point we don't accept any more Decls for processing.
---
clang/include/clang/Driver/Options.td | 4 +
.../include/clang/Frontend/FrontendOptions.h | 3 +
clang/lib/Frontend/FrontendAction.cpp | 190 +++++++++++++++++-
.../dump-deserialized-declaration-ranges.cpp | 118 +++++++++++
4 files changed, 310 insertions(+), 5 deletions(-)
create mode 100644 clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 3af072242d039..1737e40b776e1 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7968,6 +7968,10 @@ def print_dependency_directives_minimized_source : Flag<["-"],
"print-dependency-directives-minimized-source">,
HelpText<"Print the output of the dependency directives source minimizer">;
}
+def dump_deserialized_declaration_ranges : Joined<["-"],
+ "dump-deserialized-declaration-ranges=">,
+ HelpText<"Dump ranges of deserialized declarations to aid debugging and minimization">,
+ MarshallingInfoString<FrontendOpts<"DumpDeserializedDeclarationRangesPath">>;
defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse,
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index a9c9849ff52ab..8ef9ce9db8783 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -530,6 +530,9 @@ class FrontendOptions {
/// Output Path for module output file.
std::string ModuleOutputPath;
+ /// Output path to dump ranges of deserialized declarations.
+ std::string DumpDeserializedDeclarationRangesPath;
+
public:
FrontendOptions()
: DisableFree(false), RelocatablePCH(false), ShowHelp(false),
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 2d77f06be7446..1f939f7722d19 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -15,6 +15,8 @@
#include "clang/Basic/FileEntry.h"
#include "clang/Basic/LangStandard.h"
#include "clang/Basic/Sarif.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Stack.h"
#include "clang/Frontend/ASTUnit.h"
#include "clang/Frontend/CompilerInstance.h"
@@ -35,6 +37,7 @@
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/GlobalModuleIndex.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/BuryPointer.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -49,6 +52,166 @@ LLVM_INSTANTIATE_REGISTRY(FrontendPluginRegistry)
namespace {
+/// DeserializedDeclsLineRangePrinter dumps ranges of deserialized declarations
+/// to aid debugging and bug minimization. It implements ASTConsumer and
+/// ASTDeserializationListener, so that an object of
+/// DeserializedDeclsLineRangePrinter registers as its own listener. The
+/// ASTDeserializationListener interface provides the DeclRead callback that we
+/// use to collect the deserialized Decls. Note that printing or otherwise
+/// processing them as this point is dangerous, since that could trigger
+/// additional deserialization and crash compilation. Therefore, we process the
+/// collected Decls in HandleTranslationUnit method of ASTConsumer. This is a
+/// safe point, since we know that by this point all the Decls needed by the
+/// compiler frontend have been deserialized. In case our processing causes
+/// further deserialization, DeclRead from the listener might be called again.
+/// However, at that point we don't accept any more Decls for processing.
+class DeserializedDeclsLineRangePrinter : public ASTConsumer,
+ ASTDeserializationListener {
+public:
+ explicit DeserializedDeclsLineRangePrinter(
+ SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
+ : ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}
+
+ ASTDeserializationListener *GetASTDeserializationListener() override {
+ return this;
+ }
+
+ void DeclRead(GlobalDeclID ID, const Decl *D) override {
+ if (!IsCollectingDecls)
+ return;
+ if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
+ isa<NamespaceDecl>(D)) {
+ // These decls cover a lot of nested declarations that might not be used,
+ // reducing the granularity and making the output less useful.
+ return;
+ }
+ if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) {
+ // We choose to work at namespace level to reduce complexity and the
+ // number of cases we care about.
+ return;
+ }
+ PendingDecls.push_back(D);
+ }
+
+ struct Position {
+ unsigned Line;
+ unsigned Column;
+
+ bool operator<(const Position &other) const {
+ if (Line < other.Line)
+ return true;
+ if (Line > other.Line)
+ return false;
+ return Column < other.Column;
+ }
+
+ static Position GetSpelling(const SourceManager &SM,
+ const SourceLocation &SL) {
+ return {SM.getSpellingLineNumber(SL), SM.getSpellingColumnNumber(SL)};
+ }
+ };
+
+ struct RequiredRanges {
+ StringRef Filename;
+ std::vector<std::pair<Position, Position>> FromTo;
+ };
+ void HandleTranslationUnit(ASTContext &Context) override {
+ assert(IsCollectingDecls && "HandleTranslationUnit called twice?");
+ IsCollectingDecls = false;
+
+ // Merge ranges in each of the files.
+ struct FileData {
+ std::vector<std::pair<Position, Position>> FromTo;
+ OptionalFileEntryRef Ref;
+ };
+ llvm::DenseMap<const FileEntry *, FileData> FileToLines;
+ for (const Decl *D : PendingDecls) {
+ CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
+ if (!R.isValid())
+ continue;
+
+ auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
+ if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
+ // Such cases are rare and difficult to handle.
+ continue;
+ }
+
+ auto &Data = FileToLines[F];
+ if (!Data.Ref)
+ Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
+ Data.FromTo.push_back({Position::GetSpelling(SM, R.getBegin()),
+ Position::GetSpelling(SM, R.getEnd())});
+ }
+
+ // To simplify output, merge consecutive and intersecting ranges.
+ std::vector<RequiredRanges> Result;
+ for (auto &[F, Data] : FileToLines) {
+ auto &FromTo = Data.FromTo;
+ assert(!FromTo.empty());
+
+ if (!Data.Ref)
+ continue;
+
+ llvm::sort(FromTo);
+
+ std::vector<std::pair<Position, Position>> MergedRanges;
+ MergedRanges.push_back(FromTo.front());
+ for (auto It = FromTo.begin() + 1; It < FromTo.end(); ++It) {
+ if (MergedRanges.back().second < It->first) {
+ MergedRanges.push_back(*It);
+ continue;
+ }
+ if (MergedRanges.back().second < It->second)
+ MergedRanges.back().second = It->second;
+ }
+ Result.push_back({Data.Ref->getName(), MergedRanges});
+ }
+ printJson(Result);
+ }
+
+private:
+ std::vector<const Decl *> PendingDecls;
+ bool IsCollectingDecls = true;
+ const SourceManager &SM;
+ std::unique_ptr<llvm::raw_ostream> OS;
+
+ void printJson(llvm::ArrayRef<RequiredRanges> Result) {
+ *OS << "{\n";
+ *OS << R"( "required_ranges": [)" << "\n";
+ for (size_t I = 0; I < Result.size(); ++I) {
+ auto &F = Result[I].Filename;
+ auto &MergedRanges = Result[I].FromTo;
+ *OS << R"( {)" << "\n";
+ *OS << R"( "file": ")" << F << "\"," << "\n";
+ *OS << R"( "range": [)" << "\n";
+ for (size_t J = 0; J < MergedRanges.size(); ++J) {
+ auto &From = MergedRanges[J].first;
+ auto &To = MergedRanges[J].second;
+ *OS << R"( {)" << "\n";
+ *OS << R"( "from": {)" << "\n";
+ *OS << R"( "line": )" << From.Line << ",\n";
+ *OS << R"( "column": )" << From.Column << "\n"
+ << R"( },)" << "\n";
+ *OS << R"( "to": {)" << "\n";
+ *OS << R"( "line": )" << To.Line << ",\n";
+ *OS << R"( "column": )" << To.Column << "\n"
+ << R"( })" << "\n";
+ *OS << R"( })";
+ if (J < MergedRanges.size() - 1) {
+ *OS << ",";
+ }
+ *OS << "\n";
+ }
+ *OS << " ]" << "\n" << " }";
+ if (I < Result.size() - 1)
+ *OS << ",";
+ *OS << "\n";
+ }
+ *OS << " ]\n";
+ *OS << "}\n";
+ }
+};
+
/// Dumps deserialized declarations.
class DeserializedDeclsDumper : public DelegatingDeserializationListener {
public:
@@ -121,6 +284,25 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
if (!Consumer)
return nullptr;
+ std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+ llvm::StringRef DumpDeserializedDeclarationRangesPath =
+ CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath;
+ if (!DumpDeserializedDeclarationRangesPath.empty()) {
+ std::error_code ErrorCode;
+ auto FileStream = std::make_unique<llvm::raw_fd_ostream>(
+ DumpDeserializedDeclarationRangesPath, ErrorCode,
+ llvm::sys::fs::OF_None);
+ if (!ErrorCode) {
+ Consumers.push_back(std::make_unique<DeserializedDeclsLineRangePrinter>(
+ CI.getSourceManager(), std::move(FileStream)));
+ } else {
+ llvm::errs() << "Failed to create output file for "
+ "-dump-deserialized-declaration-ranges flag, file path: "
+ << DumpDeserializedDeclarationRangesPath
+ << ", error: " << ErrorCode.message() << "\n";
+ }
+ }
+
// Validate -add-plugin args.
bool FoundAllPlugins = true;
for (const std::string &Arg : CI.getFrontendOpts().AddPluginActions) {
@@ -138,17 +320,12 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
if (!FoundAllPlugins)
return nullptr;
- // If there are no registered plugins we don't need to wrap the consumer
- if (FrontendPluginRegistry::begin() == FrontendPluginRegistry::end())
- return Consumer;
-
// If this is a code completion run, avoid invoking the plugin consumers
if (CI.hasCodeCompletionConsumer())
return Consumer;
// Collect the list of plugins that go before the main action (in Consumers)
// or after it (in AfterConsumers)
- std::vector<std::unique_ptr<ASTConsumer>> Consumers;
std::vector<std::unique_ptr<ASTConsumer>> AfterConsumers;
for (const FrontendPluginRegistry::entry &Plugin :
FrontendPluginRegistry::entries()) {
@@ -191,6 +368,9 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
Consumers.push_back(std::move(C));
}
+ assert(Consumers.size() >= 1 && "should have added the main consumer");
+ if (Consumers.size() == 1)
+ return std::move(Consumers.front());
return std::make_unique<MultiplexConsumer>(std::move(Consumers));
}
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
new file mode 100644
index 0000000000000..c72fc32479b0f
--- /dev/null
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -0,0 +1,118 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
+// RUN: %clang_cc1 -xc++ -fmodules -dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o
+// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
+// RANGE:{
+// RANGE-NEXT: "required_ranges": [
+// RANGE-NEXT: {
+// RANGE-NEXT: "file": "{{.+}}/foo.h",
+// RANGE-NEXT: "range": [
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 1,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 9,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 11,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 11,
+// RANGE-NEXT: "column": 12
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 13,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 15,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: }
+// RANGE-NEXT: }
+// RANGE-NEXT: ]
+// RANGE-NEXT: }
+// RANGE-NEXT: ]
+// RANGE-NEXT:}
+// RUN: echo -e '{\n\
+// RUN: "required_ranges": [\n\
+// RUN: {\n\
+// RUN: "file": "%t/foo.h",\n\
+// RUN: "range": [\n\
+// RUN: {\n\
+// RUN: "from": {\n\
+// RUN: "line": 1,\n\
+// RUN: "column": 1\n\
+// RUN: },\n\
+// RUN: "to": {\n\
+// RUN: "line": 9,\n\
+// RUN: "column": 1\n\
+// RUN: }\n\
+// RUN: },\n\
+// RUN: {\n\
+// RUN: "from": {\n\
+// RUN: "line": 11,\n\
+// RUN: "column": 1\n\
+// RUN: },\n\
+// RUN: "to": {\n\
+// RUN: "line": 11,\n\
+// RUN: "column": 12\n\
+// RUN: }\n\
+// RUN: },\n\
+// RUN: {\n\
+// RUN: "from": {\n\
+// RUN: "line": 13,\n\
+// RUN: "column": 1\n\
+// RUN: },\n\
+// RUN: "to": {\n\
+// RUN: "line": 15,\n\
+// RUN: "column": 1\n\
+// RUN: }\n\
+// RUN: }\n\
+// RUN: ]\n\
+// RUN: }\n\
+// RUN: ]\n\
+// RUN:}' > %t/expected_decls
+// RUN: diff %t/decls %t/expected_decls
+
+//--- foo.cppmap
+module foo {
+ header "foo.h"
+ export *
+}
+
+//--- foo.h
+class MyData {
+public:
+ MyData(int val): value_(val) {}
+ int getValue() const {
+ return 5;
+ }
+private:
+ int value_;
+};
+
+extern int global_value;
+
+int multiply(int a, int b) {
+ return a * b;
+}
+
+//--- foo.cpp
+#include "foo.h"
+int global_value = 5;
+int main() {
+ MyData data(5);
+ int current_value = data.getValue();
+ int doubled_value = multiply(current_value, 2);
+ int final_result = doubled_value + global_value;
+}
>From 221d1dffe9229ce6a11ac1ade9b072ed6a3e000a Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Tue, 8 Apr 2025 09:26:55 +0200
Subject: [PATCH 02/10] Fix test on Windows
---
clang/test/Frontend/dump-deserialized-declaration-ranges.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
index c72fc32479b0f..9de2962e11880 100644
--- a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -7,7 +7,7 @@
// RANGE:{
// RANGE-NEXT: "required_ranges": [
// RANGE-NEXT: {
-// RANGE-NEXT: "file": "{{.+}}/foo.h",
+// RANGE-NEXT: "file": "{{.+}}foo.h",
// RANGE-NEXT: "range": [
// RANGE-NEXT: {
// RANGE-NEXT: "from": {
>From 1525fe80da2c242caeb32f65c0398403dfab9362 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Tue, 8 Apr 2025 09:32:42 +0200
Subject: [PATCH 03/10] Remove old test.
---
.../dump-deserialized-declaration-ranges.cpp | 40 -------------------
1 file changed, 40 deletions(-)
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
index 9de2962e11880..8fbbef49388a1 100644
--- a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -43,46 +43,6 @@
// RANGE-NEXT: }
// RANGE-NEXT: ]
// RANGE-NEXT:}
-// RUN: echo -e '{\n\
-// RUN: "required_ranges": [\n\
-// RUN: {\n\
-// RUN: "file": "%t/foo.h",\n\
-// RUN: "range": [\n\
-// RUN: {\n\
-// RUN: "from": {\n\
-// RUN: "line": 1,\n\
-// RUN: "column": 1\n\
-// RUN: },\n\
-// RUN: "to": {\n\
-// RUN: "line": 9,\n\
-// RUN: "column": 1\n\
-// RUN: }\n\
-// RUN: },\n\
-// RUN: {\n\
-// RUN: "from": {\n\
-// RUN: "line": 11,\n\
-// RUN: "column": 1\n\
-// RUN: },\n\
-// RUN: "to": {\n\
-// RUN: "line": 11,\n\
-// RUN: "column": 12\n\
-// RUN: }\n\
-// RUN: },\n\
-// RUN: {\n\
-// RUN: "from": {\n\
-// RUN: "line": 13,\n\
-// RUN: "column": 1\n\
-// RUN: },\n\
-// RUN: "to": {\n\
-// RUN: "line": 15,\n\
-// RUN: "column": 1\n\
-// RUN: }\n\
-// RUN: }\n\
-// RUN: ]\n\
-// RUN: }\n\
-// RUN: ]\n\
-// RUN:}' > %t/expected_decls
-// RUN: diff %t/decls %t/expected_decls
//--- foo.cppmap
module foo {
>From 14bd903b35833b6098ecde44d3b7f01ebc0dd816 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Tue, 8 Apr 2025 10:49:31 +0200
Subject: [PATCH 04/10] Handle source ranges where `isTokenRange() == true`
(the end of this range specifies the start of the last token). In this case,
compute the source location just past the end of the token at this source
location. Fix the test. The end column is not exclusive.
---
clang/lib/Frontend/FrontendAction.cpp | 8 ++++++--
.../Frontend/dump-deserialized-declaration-ranges.cpp | 7 ++++---
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 1f939f7722d19..4066d41cc4e6d 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -130,8 +130,12 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
if (!R.isValid())
continue;
+ SourceLocation End = R.getEnd();
+ if (R.isTokenRange())
+ End = Lexer::getLocForEndOfToken(End, 0, SM, D->getLangOpts());
+
auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
- if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
+ if (F != SM.getFileEntryForID(SM.getFileID(End))) {
// Such cases are rare and difficult to handle.
continue;
}
@@ -140,7 +144,7 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
if (!Data.Ref)
Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
Data.FromTo.push_back({Position::GetSpelling(SM, R.getBegin()),
- Position::GetSpelling(SM, R.getEnd())});
+ Position::GetSpelling(SM, End)});
}
// To simplify output, merge consecutive and intersecting ranges.
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
index 8fbbef49388a1..2a54bcea32d05 100644
--- a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -3,6 +3,7 @@
// RUN: split-file %s %t
// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
// RUN: %clang_cc1 -xc++ -fmodules -dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o
+// RUN: cat %t/decls
// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
// RANGE:{
// RANGE-NEXT: "required_ranges": [
@@ -16,7 +17,7 @@
// RANGE-NEXT: },
// RANGE-NEXT: "to": {
// RANGE-NEXT: "line": 9,
-// RANGE-NEXT: "column": 1
+// RANGE-NEXT: "column": 2
// RANGE-NEXT: }
// RANGE-NEXT: },
// RANGE-NEXT: {
@@ -26,7 +27,7 @@
// RANGE-NEXT: },
// RANGE-NEXT: "to": {
// RANGE-NEXT: "line": 11,
-// RANGE-NEXT: "column": 12
+// RANGE-NEXT: "column": 24
// RANGE-NEXT: }
// RANGE-NEXT: },
// RANGE-NEXT: {
@@ -36,7 +37,7 @@
// RANGE-NEXT: },
// RANGE-NEXT: "to": {
// RANGE-NEXT: "line": 15,
-// RANGE-NEXT: "column": 1
+// RANGE-NEXT: "column": 2
// RANGE-NEXT: }
// RANGE-NEXT: }
// RANGE-NEXT: ]
>From c71f0bd053a360fb3f3953392d8c010949c85aaa Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Tue, 8 Apr 2025 11:51:57 +0200
Subject: [PATCH 05/10] Relex the token past the end location of the last token
in the source range. If it's a semicolon, advance the location by one token.
---
clang/lib/Frontend/FrontendAction.cpp | 43 ++++++++++++++-----
.../dump-deserialized-declaration-ranges.cpp | 4 +-
2 files changed, 35 insertions(+), 12 deletions(-)
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 4066d41cc4e6d..ffed6febdaaba 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -18,6 +18,7 @@
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Stack.h"
+#include "clang/Basic/TokenKinds.h"
#include "clang/Frontend/ASTUnit.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendDiagnostic.h"
@@ -105,9 +106,34 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
return Column < other.Column;
}
- static Position GetSpelling(const SourceManager &SM,
- const SourceLocation &SL) {
- return {SM.getSpellingLineNumber(SL), SM.getSpellingColumnNumber(SL)};
+ static Position GetBeginSpelling(const SourceManager &SM,
+ const CharSourceRange &R) {
+ SourceLocation Begin = R.getBegin();
+ return {SM.getSpellingLineNumber(Begin),
+ SM.getSpellingColumnNumber(Begin)};
+ }
+
+ static Position GetEndSpelling(const SourceManager &SM,
+ const CharSourceRange &R,
+ const LangOptions &LangOpts) {
+ SourceLocation End = R.getEnd();
+ if (R.isTokenRange()) {
+ // Compute end location for end character of the range.
+ // The returned location is exclusive.
+ End = Lexer::getLocForEndOfToken(End, 0, SM, LangOpts);
+ } else {
+ // If end already points at the last character in the range, advance one
+ // location, so that end location is exclusive.
+ End = End.getLocWithOffset(1);
+ }
+ // Relex the token past the end location of the last token in the source
+ // range. If it's a semicolon, advance the location by one token.
+ Token PossiblySemi;
+ Lexer::getRawToken(End, PossiblySemi, SM, LangOpts, true);
+ if (PossiblySemi.is(tok::semi))
+ End = Lexer::getLocForEndOfToken(PossiblySemi.getLocation(), 0, SM,
+ LangOpts);
+ return {SM.getSpellingLineNumber(End), SM.getSpellingColumnNumber(End)};
}
};
@@ -130,12 +156,8 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
if (!R.isValid())
continue;
- SourceLocation End = R.getEnd();
- if (R.isTokenRange())
- End = Lexer::getLocForEndOfToken(End, 0, SM, D->getLangOpts());
-
auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
- if (F != SM.getFileEntryForID(SM.getFileID(End))) {
+ if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
// Such cases are rare and difficult to handle.
continue;
}
@@ -143,8 +165,9 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
auto &Data = FileToLines[F];
if (!Data.Ref)
Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
- Data.FromTo.push_back({Position::GetSpelling(SM, R.getBegin()),
- Position::GetSpelling(SM, End)});
+ Data.FromTo.push_back(
+ {Position::GetBeginSpelling(SM, R),
+ Position::GetEndSpelling(SM, R, D->getLangOpts())});
}
// To simplify output, merge consecutive and intersecting ranges.
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
index 2a54bcea32d05..433dabe5eb84e 100644
--- a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
+++ b/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
@@ -17,7 +17,7 @@
// RANGE-NEXT: },
// RANGE-NEXT: "to": {
// RANGE-NEXT: "line": 9,
-// RANGE-NEXT: "column": 2
+// RANGE-NEXT: "column": 3
// RANGE-NEXT: }
// RANGE-NEXT: },
// RANGE-NEXT: {
@@ -27,7 +27,7 @@
// RANGE-NEXT: },
// RANGE-NEXT: "to": {
// RANGE-NEXT: "line": 11,
-// RANGE-NEXT: "column": 24
+// RANGE-NEXT: "column": 25
// RANGE-NEXT: }
// RANGE-NEXT: },
// RANGE-NEXT: {
>From b2a908f877a8cc0cd6ce16b0ac67b9dcd7b2ab11 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Tue, 8 Apr 2025 11:56:24 +0200
Subject: [PATCH 06/10] Fix formatting.
---
clang/lib/Frontend/FrontendAction.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index ffed6febdaaba..0d5a40891654a 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -112,7 +112,7 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
return {SM.getSpellingLineNumber(Begin),
SM.getSpellingColumnNumber(Begin)};
}
-
+
static Position GetEndSpelling(const SourceManager &SM,
const CharSourceRange &R,
const LangOptions &LangOpts) {
>From 72c51cc12ab322c7be59723371326e9b140d73ff Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Tue, 8 Apr 2025 14:18:28 +0200
Subject: [PATCH 07/10] Simplify the code by using `Lexer::getAsCharRange` to
advance to the end location of the end token in the range.
---
clang/lib/Frontend/FrontendAction.cpp | 18 ++++++------------
1 file changed, 6 insertions(+), 12 deletions(-)
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 0d5a40891654a..345128700289b 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -13,6 +13,7 @@
#include "clang/Basic/Builtins.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/FileEntry.h"
+#include "clang/Basic/LangOptions.h"
#include "clang/Basic/LangStandard.h"
#include "clang/Basic/Sarif.h"
#include "clang/Basic/SourceLocation.h"
@@ -114,25 +115,18 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
}
static Position GetEndSpelling(const SourceManager &SM,
- const CharSourceRange &R,
+ const CharSourceRange &Range,
const LangOptions &LangOpts) {
+ // For token ranges, compute end location for end character of the range.
+ // The end location of returned range is exclusive.
+ CharSourceRange R = Lexer::getAsCharRange(Range, SM, LangOpts);
SourceLocation End = R.getEnd();
- if (R.isTokenRange()) {
- // Compute end location for end character of the range.
- // The returned location is exclusive.
- End = Lexer::getLocForEndOfToken(End, 0, SM, LangOpts);
- } else {
- // If end already points at the last character in the range, advance one
- // location, so that end location is exclusive.
- End = End.getLocWithOffset(1);
- }
// Relex the token past the end location of the last token in the source
// range. If it's a semicolon, advance the location by one token.
Token PossiblySemi;
Lexer::getRawToken(End, PossiblySemi, SM, LangOpts, true);
if (PossiblySemi.is(tok::semi))
- End = Lexer::getLocForEndOfToken(PossiblySemi.getLocation(), 0, SM,
- LangOpts);
+ End = End.getLocWithOffset(1);
return {SM.getSpellingLineNumber(End), SM.getSpellingColumnNumber(End)};
}
};
>From 1fae62f05671ecac7422b3bb8b54f589bce1ba0b Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Tue, 8 Apr 2025 15:00:21 +0200
Subject: [PATCH 08/10] Rename the flag to `dump-minimization-hints` to make it
more specific.
---
clang/include/clang/Driver/Options.td | 8 ++++----
clang/include/clang/Frontend/FrontendOptions.h | 5 +++--
clang/lib/Frontend/FrontendAction.cpp | 2 +-
3 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1737e40b776e1..fdcf758e263cd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -7968,10 +7968,10 @@ def print_dependency_directives_minimized_source : Flag<["-"],
"print-dependency-directives-minimized-source">,
HelpText<"Print the output of the dependency directives source minimizer">;
}
-def dump_deserialized_declaration_ranges : Joined<["-"],
- "dump-deserialized-declaration-ranges=">,
- HelpText<"Dump ranges of deserialized declarations to aid debugging and minimization">,
- MarshallingInfoString<FrontendOpts<"DumpDeserializedDeclarationRangesPath">>;
+def dump_minimization_hints : Joined<["-"],
+ "dump-minimization-hints=">,
+ HelpText<"Dump ranges of deserialized declarations to use as bug minimization hints">,
+ MarshallingInfoString<FrontendOpts<"DumpMinimizationHintsPath">>;
defm emit_llvm_uselists : BoolOption<"", "emit-llvm-uselists",
CodeGenOpts<"EmitLLVMUseLists">, DefaultFalse,
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index 8ef9ce9db8783..c919a53ae089e 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -530,8 +530,9 @@ class FrontendOptions {
/// Output Path for module output file.
std::string ModuleOutputPath;
- /// Output path to dump ranges of deserialized declarations.
- std::string DumpDeserializedDeclarationRangesPath;
+ /// Output path to dump ranges of deserialized declarations to use as
+ /// minimization hints.
+ std::string DumpMinimizationHintsPath;
public:
FrontendOptions()
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 345128700289b..638ba6adbde01 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -307,7 +307,7 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
std::vector<std::unique_ptr<ASTConsumer>> Consumers;
llvm::StringRef DumpDeserializedDeclarationRangesPath =
- CI.getFrontendOpts().DumpDeserializedDeclarationRangesPath;
+ CI.getFrontendOpts().DumpMinimizationHintsPath;
if (!DumpDeserializedDeclarationRangesPath.empty()) {
std::error_code ErrorCode;
auto FileStream = std::make_unique<llvm::raw_fd_ostream>(
>From 67197439da14478112c4b7d89e4e3a8de56fd681 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Tue, 8 Apr 2025 15:17:15 +0200
Subject: [PATCH 09/10] Rename and fix the test.
---
...lized-declaration-ranges.cpp => dump-minimization-hints.cpp} | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
rename clang/test/Frontend/{dump-deserialized-declaration-ranges.cpp => dump-minimization-hints.cpp} (93%)
diff --git a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp b/clang/test/Frontend/dump-minimization-hints.cpp
similarity index 93%
rename from clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
rename to clang/test/Frontend/dump-minimization-hints.cpp
index 433dabe5eb84e..484378606da2b 100644
--- a/clang/test/Frontend/dump-deserialized-declaration-ranges.cpp
+++ b/clang/test/Frontend/dump-minimization-hints.cpp
@@ -2,7 +2,7 @@
// RUN: mkdir -p %t
// RUN: split-file %s %t
// RUN: %clang_cc1 -xc++ -fmodules -fmodule-name=foo -fmodule-map-file=%t/foo.cppmap -emit-module %t/foo.cppmap -o %t/foo.pcm
-// RUN: %clang_cc1 -xc++ -fmodules -dump-deserialized-declaration-ranges=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o
+// RUN: %clang_cc1 -xc++ -fmodules -dump-minimization-hints=%t/decls -fmodule-file=%t/foo.pcm %t/foo.cpp -o %t/foo.o
// RUN: cat %t/decls
// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
// RANGE:{
>From 7243489ca88d80f495c5cf448471ffc219fbf188 Mon Sep 17 00:00:00 2001
From: Viktoriia Bakalova <bakalova at google.com>
Date: Wed, 9 Apr 2025 17:52:34 +0200
Subject: [PATCH 10/10] Address review comments.
---
clang/lib/Frontend/FrontendAction.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 638ba6adbde01..031c59b1a5c2d 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -67,10 +67,10 @@ namespace {
/// compiler frontend have been deserialized. In case our processing causes
/// further deserialization, DeclRead from the listener might be called again.
/// However, at that point we don't accept any more Decls for processing.
-class DeserializedDeclsLineRangePrinter : public ASTConsumer,
- ASTDeserializationListener {
+class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
+ ASTDeserializationListener {
public:
- explicit DeserializedDeclsLineRangePrinter(
+ explicit DeserializedDeclsSourceRangePrinter(
SourceManager &SM, std::unique_ptr<llvm::raw_fd_ostream> OS)
: ASTDeserializationListener(), SM(SM), OS(std::move(OS)) {}
@@ -144,7 +144,7 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
std::vector<std::pair<Position, Position>> FromTo;
OptionalFileEntryRef Ref;
};
- llvm::DenseMap<const FileEntry *, FileData> FileToLines;
+ llvm::DenseMap<const FileEntry *, FileData> FileToRanges;
for (const Decl *D : PendingDecls) {
CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
if (!R.isValid())
@@ -156,7 +156,7 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
continue;
}
- auto &Data = FileToLines[F];
+ auto &Data = FileToRanges[F];
if (!Data.Ref)
Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
Data.FromTo.push_back(
@@ -166,7 +166,7 @@ class DeserializedDeclsLineRangePrinter : public ASTConsumer,
// To simplify output, merge consecutive and intersecting ranges.
std::vector<RequiredRanges> Result;
- for (auto &[F, Data] : FileToLines) {
+ for (auto &[F, Data] : FileToRanges) {
auto &FromTo = Data.FromTo;
assert(!FromTo.empty());
@@ -314,11 +314,11 @@ FrontendAction::CreateWrappedASTConsumer(CompilerInstance &CI,
DumpDeserializedDeclarationRangesPath, ErrorCode,
llvm::sys::fs::OF_None);
if (!ErrorCode) {
- Consumers.push_back(std::make_unique<DeserializedDeclsLineRangePrinter>(
+ Consumers.push_back(std::make_unique<DeserializedDeclsSourceRangePrinter>(
CI.getSourceManager(), std::move(FileStream)));
} else {
llvm::errs() << "Failed to create output file for "
- "-dump-deserialized-declaration-ranges flag, file path: "
+ "-dump-minimization-hints flag, file path: "
<< DumpDeserializedDeclarationRangesPath
<< ", error: " << ErrorCode.message() << "\n";
}
More information about the cfe-commits
mailing list