[clang] [Clang] Handle C++20 export declarations in -dump-minimization-hints (PR #151666)
Ilya Biryukov via cfe-commits
cfe-commits at lists.llvm.org
Fri Aug 1 01:12:15 PDT 2025
https://github.com/ilya-biryukov updated https://github.com/llvm/llvm-project/pull/151666
>From ac646c60e0b323eeab7dfa6e1b8965fa7fa78796 Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov at google.com>
Date: Thu, 31 Jul 2025 16:35:50 +0200
Subject: [PATCH 1/2] [Clang] Dump minimization hints for namespaces
Unlike other declarations, these cover two ranges:
- from `namespace/inline namespace` to the opening `{`,
- the closing `}`.
This allows to mark the declarations inside the namespace itself
independently.
---
clang/lib/Frontend/FrontendAction.cpp | 86 +++++++++++++++----
.../test/Frontend/dump-minimization-hints.cpp | 49 ++++++++++-
2 files changed, 118 insertions(+), 17 deletions(-)
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 1d82fc775b28a..3aa5eecfbc8e5 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -9,6 +9,7 @@
#include "clang/Frontend/FrontendAction.h"
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
#include "clang/AST/DeclGroup.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/DiagnosticOptions.h"
@@ -38,6 +39,7 @@
#include "clang/Serialization/ASTDeserializationListener.h"
#include "clang/Serialization/ASTReader.h"
#include "clang/Serialization/GlobalModuleIndex.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/BuryPointer.h"
@@ -87,12 +89,25 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
// reducing the granularity and making the output less useful.
return;
}
- if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) {
+ auto *DC = D->getLexicalDeclContext();
+ if (!DC || !DC->isFileContext()) {
// We choose to work at namespace level to reduce complexity and the
// number of cases we care about.
return;
}
+
PendingDecls.push_back(D);
+ if (auto *NS = dyn_cast<NamespaceDecl>(DC)) {
+ // Add any namespaces we have not seen before.
+ // Note that we filter out namespaces from DeclRead as it includes too
+ // all redeclarations and we only want the ones that had other used
+ // declarations.
+ while (NS && ProcessedNamespaces.insert(NS).second) {
+ PendingDecls.push_back(NS);
+
+ NS = dyn_cast<NamespaceDecl>(NS->getLexicalParent());
+ }
+ }
}
struct Position {
@@ -141,23 +156,25 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
OptionalFileEntryRef Ref;
};
llvm::DenseMap<const FileEntry *, FileData> FileToRanges;
+
for (const Decl *D : PendingDecls) {
- CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
- if (!R.isValid())
- continue;
+ for (CharSourceRange R : getRangesToMark(D)) {
+ if (!R.isValid())
+ continue;
- auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
- if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
- // Such cases are rare and difficult to handle.
- continue;
- }
+ auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
+ if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
+ // Such cases are rare and difficult to handle.
+ continue;
+ }
- auto &Data = FileToRanges[F];
- if (!Data.Ref)
- Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
- Data.FromTo.push_back(
- {Position::GetBeginSpelling(SM, R),
- Position::GetEndSpelling(SM, R, D->getLangOpts())});
+ auto &Data = FileToRanges[F];
+ if (!Data.Ref)
+ Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
+ Data.FromTo.push_back(
+ {Position::GetBeginSpelling(SM, R),
+ Position::GetEndSpelling(SM, R, D->getLangOpts())});
+ }
}
// To simplify output, merge consecutive and intersecting ranges.
@@ -188,10 +205,49 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
private:
std::vector<const Decl *> PendingDecls;
+ llvm::DenseSet<const NamespaceDecl *> ProcessedNamespaces;
bool IsCollectingDecls = true;
const SourceManager &SM;
std::unique_ptr<llvm::raw_ostream> OS;
+ llvm::SmallVector<CharSourceRange, 2> getRangesToMark(const Decl *D) {
+ auto *NS = dyn_cast<NamespaceDecl>(D);
+ if (!NS)
+ return {SM.getExpansionRange(D->getSourceRange())};
+
+ SourceLocation LBraceLoc;
+ if (NS->isAnonymousNamespace()) {
+ LBraceLoc = NS->getLocation();
+ } else {
+ // Start with the location of the identifier.
+ SourceLocation TokenBeforeLBrace = NS->getLocation();
+ if (NS->hasAttrs()) {
+ for (auto *A : NS->getAttrs()) {
+ // But attributes may go after it.
+ if (SM.isBeforeInTranslationUnit(TokenBeforeLBrace,
+ A->getRange().getEnd())) {
+ // Give up, the attributes are often coming from macros and we
+ // cannot skip them reliably.
+ return {};
+ }
+ }
+ }
+ auto &LangOpts = D->getLangOpts();
+ // Now skip one token, the next should be the lbrace.
+ Token Tok;
+ if (Lexer::getRawToken(TokenBeforeLBrace, Tok, SM, LangOpts, true) ||
+ Lexer::getRawToken(Tok.getEndLoc(), Tok, SM, LangOpts, true) ||
+ Tok.getKind() != tok::l_brace) {
+ // On error or if we did not find the token we expected, avoid marking
+ // everything inside the namespace as used.
+ return {};
+ }
+ LBraceLoc = Tok.getLocation();
+ }
+ return {SM.getExpansionRange(SourceRange(NS->getBeginLoc(), LBraceLoc)),
+ SM.getExpansionRange(NS->getRBraceLoc())};
+ }
+
void printJson(llvm::ArrayRef<RequiredRanges> Result) {
*OS << "{\n";
*OS << R"( "required_ranges": [)" << "\n";
diff --git a/clang/test/Frontend/dump-minimization-hints.cpp b/clang/test/Frontend/dump-minimization-hints.cpp
index 273fd7f4ecd63..4c5dfbc343cd4 100644
--- a/clang/test/Frontend/dump-minimization-hints.cpp
+++ b/clang/test/Frontend/dump-minimization-hints.cpp
@@ -59,6 +59,36 @@
// RANGE-NEXT: "line": 23,
// RANGE-NEXT: "column": 2
// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 31,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 31,
+// RANGE-NEXT: "column": 27
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 32,
+// RANGE-NEXT: "column": 3
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 32,
+// RANGE-NEXT: "column": 12
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 34,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 34,
+// RANGE-NEXT: "column": 2
+// RANGE-NEXT: }
// RANGE-NEXT: }
// RANGE-NEXT: ]
// RANGE-NEXT: }
@@ -88,7 +118,7 @@ int multiply(int a, int b) {
return a * b;
}
-inline int unused_by_foo() {} // line 17
+inline void unused_by_foo() {} // line 17
inline void recursively_used_by_foo() {} // line 19
inline int used_by_foo() { // line 20
@@ -98,6 +128,20 @@ inline int used_by_foo() { // line 20
struct UnusedByFoo {};
+namespace ns_unused_by_foo {
+ void x();
+}
+
+namespace ns_used_by_foo { // line 31
+ void x(); // line 32
+ void unused_y();
+} // line 34
+
+// Does not have any declarations that are used, so
+// will not be marked as used.
+namespace ns_used_by_foo {
+ void unused_z();
+}
//--- foo.cpp
#include "foo.h"
int global_value = 5;
@@ -107,5 +151,6 @@ int main() {
int doubled_value = multiply(current_value, 2);
int final_result = doubled_value + global_value;
- return used_by_foo();
+ used_by_foo();
+ ns_used_by_foo::x();
}
>From 8193cff8fac6ae12d261f006d32896f119e111ab Mon Sep 17 00:00:00 2001
From: Ilya Biryukov <ibiryukov at google.com>
Date: Fri, 1 Aug 2025 09:14:46 +0200
Subject: [PATCH 2/2] [Clang] Handle C++20 export declarations in
-dump-minimization-hints
Thanks to Justin Stitt for bringing this up, providing test cases and a
direction for the fix!
---
clang/lib/Frontend/FrontendAction.cpp | 82 ++++++++----
.../dump-minimization-hints-cpp20-modules.cpp | 117 ++++++++++++++++++
2 files changed, 172 insertions(+), 27 deletions(-)
create mode 100644 clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 3aa5eecfbc8e5..3a06633426c08 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -84,29 +84,28 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
if (!IsCollectingDecls)
return;
if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
- isa<NamespaceDecl>(D)) {
+ isa<NamespaceDecl>(D) || isa<ExportDecl>(D)) {
// These decls cover a lot of nested declarations that might not be used,
// reducing the granularity and making the output less useful.
return;
}
- auto *DC = D->getLexicalDeclContext();
- if (!DC || !DC->isFileContext()) {
- // We choose to work at namespace level to reduce complexity and the
- // number of cases we care about.
+ if (isa<ParmVarDecl>(D)) {
+ // Parameters are covered by their functions.
return;
}
+ auto *DC = D->getLexicalDeclContext();
+ if (!DC || !shouldIncludeDeclsIn(DC))
+ return;
PendingDecls.push_back(D);
- if (auto *NS = dyn_cast<NamespaceDecl>(DC)) {
- // Add any namespaces we have not seen before.
- // Note that we filter out namespaces from DeclRead as it includes too
- // all redeclarations and we only want the ones that had other used
- // declarations.
- while (NS && ProcessedNamespaces.insert(NS).second) {
- PendingDecls.push_back(NS);
-
- NS = dyn_cast<NamespaceDecl>(NS->getLexicalParent());
- }
+ for (; (isa<ExportDecl>(DC) || isa<NamespaceDecl>(DC)) &&
+ ProcessedDeclContexts.insert(DC).second;
+ DC = DC->getLexicalParent()) {
+ // Add any interesting decl contexts that we have not seen before.
+ // Note that we filter them out from DeclRead as that would include all
+ // redeclarations of namespaces, potentially those that do not have any
+ // imported declarations.
+ PendingDecls.push_back(cast<Decl>(DC));
}
}
@@ -205,12 +204,37 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
private:
std::vector<const Decl *> PendingDecls;
- llvm::DenseSet<const NamespaceDecl *> ProcessedNamespaces;
+ llvm::DenseSet<const DeclContext *> ProcessedDeclContexts;
bool IsCollectingDecls = true;
const SourceManager &SM;
std::unique_ptr<llvm::raw_ostream> OS;
+ static bool shouldIncludeDeclsIn(const DeclContext *DC) {
+ assert(DC && "DC is null");
+ // We choose to work at namespace level to reduce complexity and the number
+ // of cases we care about.
+ // We still need to carefully handle composite declarations like
+ // `ExportDecl`.
+ for (; DC; DC = DC->getLexicalParent()) {
+ if (DC->isFileContext())
+ return true;
+ if (isa<ExportDecl>(DC))
+ continue; // Depends on the parent.
+ return false;
+ }
+ llvm_unreachable("DeclConext chain must end with a translation unit");
+ }
llvm::SmallVector<CharSourceRange, 2> getRangesToMark(const Decl *D) {
+ if (auto *ED = dyn_cast<ExportDecl>(D)) {
+ if (!ED->hasBraces())
+ return {SM.getExpansionRange(ED->getExportLoc())};
+
+ return {SM.getExpansionRange(SourceRange(
+ ED->getExportLoc(),
+ lexForLBrace(ED->getExportLoc(), D->getLangOpts()))),
+ SM.getExpansionRange(ED->getRBraceLoc())};
+ }
+
auto *NS = dyn_cast<NamespaceDecl>(D);
if (!NS)
return {SM.getExpansionRange(D->getSourceRange())};
@@ -232,17 +256,7 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
}
}
}
- auto &LangOpts = D->getLangOpts();
- // Now skip one token, the next should be the lbrace.
- Token Tok;
- if (Lexer::getRawToken(TokenBeforeLBrace, Tok, SM, LangOpts, true) ||
- Lexer::getRawToken(Tok.getEndLoc(), Tok, SM, LangOpts, true) ||
- Tok.getKind() != tok::l_brace) {
- // On error or if we did not find the token we expected, avoid marking
- // everything inside the namespace as used.
- return {};
- }
- LBraceLoc = Tok.getLocation();
+ LBraceLoc = lexForLBrace(TokenBeforeLBrace, D->getLangOpts());
}
return {SM.getExpansionRange(SourceRange(NS->getBeginLoc(), LBraceLoc)),
SM.getExpansionRange(NS->getRBraceLoc())};
@@ -283,6 +297,20 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
*OS << " ]\n";
*OS << "}\n";
}
+
+ SourceLocation lexForLBrace(SourceLocation TokenBeforeLBrace,
+ const LangOptions &LangOpts) {
+ // Now skip one token, the next should be the lbrace.
+ Token Tok;
+ if (Lexer::getRawToken(TokenBeforeLBrace, Tok, SM, LangOpts, true) ||
+ Lexer::getRawToken(Tok.getEndLoc(), Tok, SM, LangOpts, true) ||
+ Tok.getKind() != tok::l_brace) {
+ // On error or if we did not find the token we expected, avoid marking
+ // everything inside the namespace as used.
+ return SourceLocation();
+ }
+ return Tok.getLocation();
+ }
};
/// Dumps deserialized declarations.
diff --git a/clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp b/clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp
new file mode 100644
index 0000000000000..978af4cc7c0ad
--- /dev/null
+++ b/clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp
@@ -0,0 +1,117 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-module-interface -o %t/foo.pcm
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/use.cpp -dump-minimization-hints=%t/decls
+// RUN: cat %t/decls
+// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
+// RANGE:{
+// RANGE-NEXT:"required_ranges": [
+// RANGE-NEXT: {
+// RANGE-NEXT: "file": "/usr/local/google/home/ibiryukov/code/llvm-project/build/tools/clang/test/Frontend/Output/dump-minimization-hints-cpp20-modules.cpp.tmp/foo.cppm",
+// RANGE-NEXT: "range": [
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 3,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 3,
+// RANGE-NEXT: "column": 22
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 4,
+// RANGE-NEXT: "column": 3
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 4,
+// RANGE-NEXT: "column": 9
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 4,
+// RANGE-NEXT: "column": 10
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 4,
+// RANGE-NEXT: "column": 43
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 6,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 6,
+// RANGE-NEXT: "column": 2
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 8,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 8,
+// RANGE-NEXT: "column": 7
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 8,
+// RANGE-NEXT: "column": 8
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 8,
+// RANGE-NEXT: "column": 25
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 9,
+// RANGE-NEXT: "column": 3
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 9,
+// RANGE-NEXT: "column": 36
+// RANGE-NEXT: }
+// RANGE-NEXT: },
+// RANGE-NEXT: {
+// RANGE-NEXT: "from": {
+// RANGE-NEXT: "line": 11,
+// RANGE-NEXT: "column": 1
+// RANGE-NEXT: },
+// RANGE-NEXT: "to": {
+// RANGE-NEXT: "line": 11,
+// RANGE-NEXT: "column": 2
+// RANGE-NEXT: }
+// RANGE-NEXT: }
+// RANGE-NEXT: ]
+// RANGE-NEXT: }
+// RANGE-NEXT:]
+// RANGE-NEXT:}
+
+//--- foo.cppm
+export module foo;
+
+namespace piecemeal { // line 3
+ export int used(int n) { return n + 1; }
+ export int unused(int n) { return n + 2; }
+}
+
+export namespace whole { // line 8
+ int used(int n) { return n + 1; }
+ int unused(int n) { return n + 3; }
+} // line 11
+
+//--- use.cpp
+import foo;
+
+int main() {
+ piecemeal::used(4); // only of the functions used from each namespace.
+ whole::used(4);
+}
More information about the cfe-commits
mailing list