[clang] [Clang] Handle C++20 export declarations in -dump-minimization-hints (PR #151666)

via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 1 01:08:42 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Ilya Biryukov (ilya-biryukov)

<details>
<summary>Changes</summary>

Thanks to Justin Stitt for bringing this up, providing test cases and a
direction for the fix!

---
Full diff: https://github.com/llvm/llvm-project/pull/151666.diff


3 Files Affected:

- (modified) clang/lib/Frontend/FrontendAction.cpp (+102-18) 
- (added) clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp (+117) 
- (modified) clang/test/Frontend/dump-minimization-hints.cpp (+47-2) 


``````````diff
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 1d82fc775b28a..4ceb78ee81bad 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -9,6 +9,7 @@
 #include "clang/Frontend/FrontendAction.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/DeclGroup.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/DiagnosticOptions.h"
@@ -38,6 +39,7 @@
 #include "clang/Serialization/ASTDeserializationListener.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/BuryPointer.h"
@@ -82,17 +84,29 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
     if (!IsCollectingDecls)
       return;
     if (!D || isa<TranslationUnitDecl>(D) || isa<LinkageSpecDecl>(D) ||
-        isa<NamespaceDecl>(D)) {
+        isa<NamespaceDecl>(D) || isa<ExportDecl>(D)) {
       // These decls cover a lot of nested declarations that might not be used,
       // reducing the granularity and making the output less useful.
       return;
     }
-    if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) {
-      // We choose to work at namespace level to reduce complexity and the
-      // number of cases we care about.
+    if (isa<ParmVarDecl>(D)) {
+      // Parameters are covered by their functions.
       return;
     }
+    auto *DC = D->getLexicalDeclContext();
+    if (!DC || !shouldIncludeDeclsIn(DC))
+      return;
+
     PendingDecls.push_back(D);
+    for (; (isa<ExportDecl>(DC) || isa<NamespaceDecl>(DC)) &&
+           ProcessedDeclContexts.insert(DC).second;
+         DC = DC->getLexicalParent()) {
+      // Add any interesting decl contexts that we have not seen before.
+      // Note that we filter them out from DeclRead as that would include all
+      // redeclarations of namespaces, potentially those that do not have any
+      // imported declarations.
+      PendingDecls.push_back(cast<Decl>(DC));
+    }
   }
 
   struct Position {
@@ -141,23 +155,25 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
       OptionalFileEntryRef Ref;
     };
     llvm::DenseMap<const FileEntry *, FileData> FileToRanges;
+
     for (const Decl *D : PendingDecls) {
-      CharSourceRange R = SM.getExpansionRange(D->getSourceRange());
-      if (!R.isValid())
-        continue;
+      for (CharSourceRange R : getRangesToMark(D)) {
+        if (!R.isValid())
+          continue;
 
-      auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
-      if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
-        // Such cases are rare and difficult to handle.
-        continue;
-      }
+        auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin()));
+        if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) {
+          // Such cases are rare and difficult to handle.
+          continue;
+        }
 
-      auto &Data = FileToRanges[F];
-      if (!Data.Ref)
-        Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
-      Data.FromTo.push_back(
-          {Position::GetBeginSpelling(SM, R),
-           Position::GetEndSpelling(SM, R, D->getLangOpts())});
+        auto &Data = FileToRanges[F];
+        if (!Data.Ref)
+          Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin()));
+        Data.FromTo.push_back(
+            {Position::GetBeginSpelling(SM, R),
+             Position::GetEndSpelling(SM, R, D->getLangOpts())});
+      }
     }
 
     // To simplify output, merge consecutive and intersecting ranges.
@@ -188,10 +204,64 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
 
 private:
   std::vector<const Decl *> PendingDecls;
+  llvm::DenseSet<const DeclContext *> ProcessedDeclContexts;
   bool IsCollectingDecls = true;
   const SourceManager &SM;
   std::unique_ptr<llvm::raw_ostream> OS;
 
+  static bool shouldIncludeDeclsIn(const DeclContext* DC) {
+    assert(DC && "DC is null");
+    // We choose to work at namespace level to reduce complexity and the number
+    // of cases we care about.
+    // We still need to carefully handle composite declarations like
+    // `ExportDecl`.
+    for (; DC; DC = DC->getLexicalParent()) {
+      if (DC->isFileContext())
+        return true;
+      if (isa<ExportDecl>(DC))
+        continue; // Depends on the parent.
+      return false;
+    }
+    llvm_unreachable("DeclConext chain must end with a translation unit");
+  }
+  llvm::SmallVector<CharSourceRange, 2> getRangesToMark(const Decl *D) {
+    if (auto *ED = dyn_cast<ExportDecl>(D)) {
+      if (!ED->hasBraces())
+        return {SM.getExpansionRange(ED->getExportLoc())};
+
+      return {SM.getExpansionRange(SourceRange(
+                  ED->getExportLoc(),
+                  lexForLBrace(ED->getExportLoc(), D->getLangOpts()))),
+              SM.getExpansionRange(ED->getRBraceLoc())};
+    }
+
+    auto *NS = dyn_cast<NamespaceDecl>(D);
+    if (!NS)
+      return {SM.getExpansionRange(D->getSourceRange())};
+
+    SourceLocation LBraceLoc;
+    if (NS->isAnonymousNamespace()) {
+      LBraceLoc = NS->getLocation();
+    } else {
+      // Start with the location of the identifier.
+      SourceLocation TokenBeforeLBrace = NS->getLocation();
+      if (NS->hasAttrs()) {
+        for (auto *A : NS->getAttrs()) {
+          // But attributes may go after it.
+          if (SM.isBeforeInTranslationUnit(TokenBeforeLBrace,
+                                           A->getRange().getEnd())) {
+            // Give up, the attributes are often coming from macros and we
+            // cannot skip them reliably.
+            return {};
+          }
+        }
+      }
+      LBraceLoc = lexForLBrace(TokenBeforeLBrace, D->getLangOpts());
+    }
+    return {SM.getExpansionRange(SourceRange(NS->getBeginLoc(), LBraceLoc)),
+            SM.getExpansionRange(NS->getRBraceLoc())};
+  }
+
   void printJson(llvm::ArrayRef<RequiredRanges> Result) {
     *OS << "{\n";
     *OS << R"(  "required_ranges": [)" << "\n";
@@ -227,6 +297,20 @@ class DeserializedDeclsSourceRangePrinter : public ASTConsumer,
     *OS << "  ]\n";
     *OS << "}\n";
   }
+
+  SourceLocation lexForLBrace(SourceLocation TokenBeforeLBrace,
+                              const LangOptions &LangOpts) {
+    // Now skip one token, the next should be the lbrace.
+    Token Tok;
+    if (Lexer::getRawToken(TokenBeforeLBrace, Tok, SM, LangOpts, true) ||
+        Lexer::getRawToken(Tok.getEndLoc(), Tok, SM, LangOpts, true) ||
+        Tok.getKind() != tok::l_brace) {
+      // On error or if we did not find the token we expected, avoid marking
+      // everything inside the namespace as used.
+      return SourceLocation();
+    }
+    return Tok.getLocation();
+  }
 };
 
 /// Dumps deserialized declarations.
diff --git a/clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp b/clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp
new file mode 100644
index 0000000000000..978af4cc7c0ad
--- /dev/null
+++ b/clang/test/Frontend/dump-minimization-hints-cpp20-modules.cpp
@@ -0,0 +1,117 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -std=c++20 %t/foo.cppm -emit-module-interface -o %t/foo.pcm
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/use.cpp -dump-minimization-hints=%t/decls
+// RUN: cat %t/decls
+// RUN: cat %t/decls | FileCheck -check-prefix=RANGE %s
+// RANGE:{
+// RANGE-NEXT:"required_ranges": [
+// RANGE-NEXT:  {
+// RANGE-NEXT:    "file": "/usr/local/google/home/ibiryukov/code/llvm-project/build/tools/clang/test/Frontend/Output/dump-minimization-hints-cpp20-modules.cpp.tmp/foo.cppm",
+// RANGE-NEXT:    "range": [
+// RANGE-NEXT:      {
+// RANGE-NEXT:        "from": {
+// RANGE-NEXT:          "line": 3,
+// RANGE-NEXT:          "column": 1
+// RANGE-NEXT:        },
+// RANGE-NEXT:        "to": {
+// RANGE-NEXT:          "line": 3,
+// RANGE-NEXT:          "column": 22
+// RANGE-NEXT:        }
+// RANGE-NEXT:      },
+// RANGE-NEXT:      {
+// RANGE-NEXT:        "from": {
+// RANGE-NEXT:          "line": 4,
+// RANGE-NEXT:          "column": 3
+// RANGE-NEXT:        },
+// RANGE-NEXT:        "to": {
+// RANGE-NEXT:          "line": 4,
+// RANGE-NEXT:          "column": 9
+// RANGE-NEXT:        }
+// RANGE-NEXT:      },
+// RANGE-NEXT:      {
+// RANGE-NEXT:        "from": {
+// RANGE-NEXT:          "line": 4,
+// RANGE-NEXT:          "column": 10
+// RANGE-NEXT:        },
+// RANGE-NEXT:        "to": {
+// RANGE-NEXT:          "line": 4,
+// RANGE-NEXT:          "column": 43
+// RANGE-NEXT:        }
+// RANGE-NEXT:      },
+// RANGE-NEXT:      {
+// RANGE-NEXT:        "from": {
+// RANGE-NEXT:          "line": 6,
+// RANGE-NEXT:          "column": 1
+// RANGE-NEXT:        },
+// RANGE-NEXT:        "to": {
+// RANGE-NEXT:          "line": 6,
+// RANGE-NEXT:          "column": 2
+// RANGE-NEXT:        }
+// RANGE-NEXT:      },
+// RANGE-NEXT:      {
+// RANGE-NEXT:        "from": {
+// RANGE-NEXT:          "line": 8,
+// RANGE-NEXT:          "column": 1
+// RANGE-NEXT:        },
+// RANGE-NEXT:        "to": {
+// RANGE-NEXT:          "line": 8,
+// RANGE-NEXT:          "column": 7
+// RANGE-NEXT:        }
+// RANGE-NEXT:      },
+// RANGE-NEXT:      {
+// RANGE-NEXT:        "from": {
+// RANGE-NEXT:          "line": 8,
+// RANGE-NEXT:          "column": 8
+// RANGE-NEXT:        },
+// RANGE-NEXT:        "to": {
+// RANGE-NEXT:          "line": 8,
+// RANGE-NEXT:          "column": 25
+// RANGE-NEXT:        }
+// RANGE-NEXT:      },
+// RANGE-NEXT:      {
+// RANGE-NEXT:        "from": {
+// RANGE-NEXT:          "line": 9,
+// RANGE-NEXT:          "column": 3
+// RANGE-NEXT:        },
+// RANGE-NEXT:        "to": {
+// RANGE-NEXT:          "line": 9,
+// RANGE-NEXT:          "column": 36
+// RANGE-NEXT:        }
+// RANGE-NEXT:      },
+// RANGE-NEXT:      {
+// RANGE-NEXT:        "from": {
+// RANGE-NEXT:          "line": 11,
+// RANGE-NEXT:          "column": 1
+// RANGE-NEXT:        },
+// RANGE-NEXT:        "to": {
+// RANGE-NEXT:          "line": 11,
+// RANGE-NEXT:          "column": 2
+// RANGE-NEXT:        }
+// RANGE-NEXT:      }
+// RANGE-NEXT:    ]
+// RANGE-NEXT:  }
+// RANGE-NEXT:]
+// RANGE-NEXT:}
+
+//--- foo.cppm
+export module foo;
+
+namespace piecemeal { // line 3
+  export int used(int n) { return n + 1; }
+  export int unused(int n) { return n + 2; }
+}
+
+export namespace whole { // line 8
+  int used(int n) { return n + 1; }
+  int unused(int n) { return n + 3; }
+} // line 11
+
+//--- use.cpp
+import foo;
+
+int main() {
+  piecemeal::used(4);  // only of the functions used from each namespace.
+  whole::used(4);
+}
diff --git a/clang/test/Frontend/dump-minimization-hints.cpp b/clang/test/Frontend/dump-minimization-hints.cpp
index 273fd7f4ecd63..4c5dfbc343cd4 100644
--- a/clang/test/Frontend/dump-minimization-hints.cpp
+++ b/clang/test/Frontend/dump-minimization-hints.cpp
@@ -59,6 +59,36 @@
 // RANGE-NEXT:            "line": 23,
 // RANGE-NEXT:            "column": 2
 // RANGE-NEXT:          }
+// RANGE-NEXT:        },
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 31,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 31,
+// RANGE-NEXT:            "column": 27
+// RANGE-NEXT:          }
+// RANGE-NEXT:        },
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 32,
+// RANGE-NEXT:            "column": 3
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 32,
+// RANGE-NEXT:            "column": 12
+// RANGE-NEXT:          }
+// RANGE-NEXT:        },
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 34,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 34,
+// RANGE-NEXT:            "column": 2
+// RANGE-NEXT:          }
 // RANGE-NEXT:        }
 // RANGE-NEXT:      ]
 // RANGE-NEXT:    }
@@ -88,7 +118,7 @@ int multiply(int a, int b) {
     return a * b;
 }
 
-inline int unused_by_foo() {} // line 17
+inline void unused_by_foo() {} // line 17
 
 inline void recursively_used_by_foo() {} // line 19
 inline int used_by_foo() { // line 20
@@ -98,6 +128,20 @@ inline int used_by_foo() { // line 20
 
 struct UnusedByFoo {};
 
+namespace ns_unused_by_foo {
+  void x();
+}
+
+namespace ns_used_by_foo { // line 31
+  void x(); // line 32
+  void unused_y(); 
+} // line 34
+
+// Does not have any declarations that are used, so 
+// will not be marked as used.
+namespace ns_used_by_foo { 
+  void unused_z();
+}
 //--- foo.cpp
 #include "foo.h"
 int global_value = 5;
@@ -107,5 +151,6 @@ int main() {
   int doubled_value = multiply(current_value, 2);
   int final_result = doubled_value + global_value;
 
-  return used_by_foo();
+  used_by_foo();
+  ns_used_by_foo::x();
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/151666


More information about the cfe-commits mailing list