[clang] [Serialization] [ASTWriter] Try to not record namespace as much as possible (PR #179178)

Chuanqi Xu via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 12 01:48:26 PDT 2026


https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/179178

>From ec03e9f3a0ae4dbbc2beda61f1c3e9413ddfbb87 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd at linux.alibaba.com>
Date: Mon, 2 Feb 2026 15:16:43 +0800
Subject: [PATCH] [Serialization] [ASTWriter] Try to not record namespace as
 much as possible

See
https://clang.llvm.org/docs/StandardCPlusPlusModules.html#experimental-non-cascading-changes
for the full background.

In short, we want to cut off the dependencies to improve the
recompilation time.

And namespace is special, as the same namespace can occur in many TUs.

This patch tries to clean up unneeded reference to namespace from other
module file. The touched parts are:

- When write on disk lookup table, don't write the merged table. This
  makes the ownership/dependency much cleaner. For performance, I think
  the intention was to make the cost of mergin table amortized. But in
  our internal workloads, I didn't observe any regression if I turn off
  writing the merged table.
- When writing redeclarations, only write the ID of the first namespace
  and avoid referenece to all other previous namespaces. (I don't want to
  write the first namespace either... but it is more complex).
- When writing the name lookup table, try to write the local namespace.

----

Update:

Close https://github.com/llvm/llvm-project/issues/184957

The roo cause of the problem is reduced BMI may not emit everything in
the lookup table, if Reduced BMI **partially** emits some decls,
then the generator may not emit the corresponding entry for the
corresponding name is already there. See MultiOnDiskHashTableGenerator::insert and
MultiOnDiskHashTableGenerator::emit for details. So we won't emit the lookup
table if we're generating reduced BMI.
---
 clang/include/clang/Serialization/ASTWriter.h |   2 -
 clang/lib/Serialization/ASTWriter.cpp         |  65 ++++--
 clang/lib/Serialization/ASTWriterDecl.cpp     |  17 +-
 .../no-transitive-change-namespace.cppm       |  44 ++++
 .../Modules/no-transitive-decl-change-4.cppm  |   2 +-
 clang/test/Modules/pr184957.cppm              | 216 ++++++++++++++++++
 6 files changed, 324 insertions(+), 22 deletions(-)
 create mode 100644 clang/test/Modules/no-transitive-change-namespace.cppm
 create mode 100644 clang/test/Modules/pr184957.cppm

diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 0f3993ad01693..88b62c7f4e6b4 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -928,8 +928,6 @@ class ASTWriter : public ASTDeserializationListener,
 
   void handleVTable(CXXRecordDecl *RD);
 
-  void addTouchedModuleFile(serialization::ModuleFile *);
-
 private:
   // ASTDeserializationListener implementation
   void ReaderInitialized(ASTReader *Reader) override;
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index ec718169550aa..71ea057c476e6 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -4085,10 +4085,6 @@ void ASTWriter::handleVTable(CXXRecordDecl *RD) {
   PendingEmittingVTables.push_back(RD);
 }
 
-void ASTWriter::addTouchedModuleFile(serialization::ModuleFile *MF) {
-  TouchedModuleFiles.insert(MF);
-}
-
 //===----------------------------------------------------------------------===//
 // DeclContext's Name Lookup Table Serialization
 //===----------------------------------------------------------------------===//
@@ -4133,7 +4129,6 @@ class ASTDeclContextNameLookupTraitBase {
            "have reference to loaded module file but no chain?");
 
     using namespace llvm::support;
-    Writer.addTouchedModuleFile(F);
     endian::write<uint32_t>(Out, Writer.getChain()->getModuleFileID(F),
                             llvm::endianness::little);
   }
@@ -4331,6 +4326,14 @@ static bool isTULocalInNamedModules(NamedDecl *D) {
   return D->getLinkageInternal() == Linkage::Internal;
 }
 
+static NamedDecl *getLocalRedecl(NamedDecl *D) {
+  for (auto *RD : D->redecls())
+    if (!RD->isFromASTFile())
+      return cast<NamedDecl>(RD);
+
+  return D;
+}
+
 class ASTDeclContextNameLookupTrait
     : public ASTDeclContextNameTrivialLookupTrait {
 public:
@@ -4404,6 +4407,13 @@ class ASTDeclContextNameLookupTrait
       NamedDecl *DeclForLocalLookup =
           getDeclForLocalLookup(Writer.getLangOpts(), D);
 
+      // Namespace may have many redeclarations in many TU.
+      // Also, these namespaces are symmetric. So that we try to use
+      // the local redecl if any.
+      if (isa<NamespaceDecl>(DeclForLocalLookup) &&
+          DeclForLocalLookup->isFromASTFile())
+        DeclForLocalLookup = getLocalRedecl(DeclForLocalLookup);
+
       if (Writer.getDoneWritingDeclsAndTypes() &&
           !Writer.wasDeclEmitted(DeclForLocalLookup))
         return;
@@ -4525,7 +4535,6 @@ class LazySpecializationInfoLookupTrait {
            "have reference to loaded module file but no chain?");
 
     using namespace llvm::support;
-    Writer.addTouchedModuleFile(F);
     endian::write<uint32_t>(Out, Writer.getChain()->getModuleFileID(F),
                             llvm::endianness::little);
   }
@@ -4624,7 +4633,16 @@ void ASTWriter::GenerateSpecializationInfoLookupTable(
     Generator.insert(HashValue, Trait.getData(Specs, ExisitingSpecs), Trait);
   }
 
-  Generator.emit(LookupTable, Trait, Lookups ? &Lookups->Table : nullptr);
+  // Reduced BMI may not emit everything in the lookup table,
+  // If Reduced BMI **partially** emits some decls,
+  // then the generator may not emit the corresponding entry for the
+  // corresponding name is already there. See
+  // MultiOnDiskHashTableGenerator::insert and
+  // MultiOnDiskHashTableGenerator::emit for details.
+  // So we won't emit the lookup table if we're generating reduced BMI.
+  auto *ToEmitMaybeMergedLookupTable =
+      (!isGeneratingReducedBMI() && Lookups) ? &Lookups->Table : nullptr;
+  Generator.emit(LookupTable, Trait, ToEmitMaybeMergedLookupTable);
 }
 
 uint64_t ASTWriter::WriteSpecializationInfoLookupTable(
@@ -4821,7 +4839,16 @@ void ASTWriter::GenerateNameLookupTable(
   // Create the on-disk hash table. Also emit the existing imported and
   // merged table if there is one.
   auto *Lookups = Chain ? Chain->getLoadedLookupTables(DC) : nullptr;
-  Generator.emit(LookupTable, Trait, Lookups ? &Lookups->Table : nullptr);
+  // Reduced BMI may not emit everything in the lookup table,
+  // If Reduced BMI **partially** emits some decls,
+  // then the generator may not emit the corresponding entry for the
+  // corresponding name is already there. See
+  // MultiOnDiskHashTableGenerator::insert and
+  // MultiOnDiskHashTableGenerator::emit for details.
+  // So we won't emit the lookup table if we're generating reduced BMI.
+  auto *ToEmitMaybeMergedLookupTable =
+      (!isGeneratingReducedBMI() && Lookups) ? &Lookups->Table : nullptr;
+  Generator.emit(LookupTable, Trait, ToEmitMaybeMergedLookupTable);
 
   const auto &ModuleLocalDecls = Trait.getModuleLocalDecls();
   if (!ModuleLocalDecls.empty()) {
@@ -4837,11 +4864,15 @@ void ASTWriter::GenerateNameLookupTable(
                                         ModuleLocalTrait);
     }
 
+    // See the above comment. We won't emit the merged table if we're generating
+    // reduced BMI.
     auto *ModuleLocalLookups =
-        Chain ? Chain->getModuleLocalLookupTables(DC) : nullptr;
-    ModuleLocalLookupGenerator.emit(
-        ModuleLocalLookupTable, ModuleLocalTrait,
-        ModuleLocalLookups ? &ModuleLocalLookups->Table : nullptr);
+        (isGeneratingReducedBMI() && Chain &&
+         Chain->getModuleLocalLookupTables(DC))
+            ? &Chain->getModuleLocalLookupTables(DC)->Table
+            : nullptr;
+    ModuleLocalLookupGenerator.emit(ModuleLocalLookupTable, ModuleLocalTrait,
+                                    ModuleLocalLookups);
   }
 
   const auto &TULocalDecls = Trait.getTULocalDecls();
@@ -4857,9 +4888,13 @@ void ASTWriter::GenerateNameLookupTable(
       TULookupGenerator.insert(Key, TULocalTrait.getData(IDs), TULocalTrait);
     }
 
-    auto *TULocalLookups = Chain ? Chain->getTULocalLookupTables(DC) : nullptr;
-    TULookupGenerator.emit(TULookupTable, TULocalTrait,
-                           TULocalLookups ? &TULocalLookups->Table : nullptr);
+    // See the above comment. We won't emit the merged table if we're generating
+    // reduced BMI.
+    auto *TULocalLookups =
+        (isGeneratingReducedBMI() && Chain && Chain->getTULocalLookupTables(DC))
+            ? &Chain->getTULocalLookupTables(DC)->Table
+            : nullptr;
+    TULookupGenerator.emit(TULookupTable, TULocalTrait, TULocalLookups);
   }
 }
 
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 7646d5d5efe00..22c754033a0d2 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2234,8 +2234,15 @@ void ASTDeclWriter::VisitRedeclarable(Redeclarable<T> *D) {
       // redecl chain.
       unsigned I = Record.size();
       Record.push_back(0);
-      if (Writer.Chain)
-        AddFirstDeclFromEachModule(DAsT, /*IncludeLocal*/false);
+      if (Writer.Chain) {
+        // Namespace can have many redeclaration in many TU.
+        // We try to avoid touching every redeclaration to try to
+        // reduce the dependencies as much as possible.
+        if (!isa<NamespaceDecl>(DAsT))
+          AddFirstDeclFromEachModule(DAsT, /*IncludeLocal*/ false);
+        else
+          Record.AddDeclRef(nullptr);
+      }
       // This is the number of imported first declarations + 1.
       Record[I] = Record.size() - I;
 
@@ -2265,8 +2272,10 @@ void ASTDeclWriter::VisitRedeclarable(Redeclarable<T> *D) {
     //
     // FIXME: This is not correct; when we reach an imported declaration we
     // won't emit its previous declaration.
-    (void)Writer.GetDeclRef(D->getPreviousDecl());
-    (void)Writer.GetDeclRef(MostRecent);
+    if (D->getPreviousDecl() && !D->getPreviousDecl()->isFromASTFile())
+      (void)Writer.GetDeclRef(D->getPreviousDecl());
+    if (!MostRecent->isFromASTFile())
+      (void)Writer.GetDeclRef(MostRecent);
   } else {
     // We use the sentinel value 0 to indicate an only declaration.
     Record.push_back(0);
diff --git a/clang/test/Modules/no-transitive-change-namespace.cppm b/clang/test/Modules/no-transitive-change-namespace.cppm
new file mode 100644
index 0000000000000..f67dd8533a6df
--- /dev/null
+++ b/clang/test/Modules/no-transitive-change-namespace.cppm
@@ -0,0 +1,44 @@
+// Test that adding a new decl in a module which originally only contained a namespace
+// won't break the dependency.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/Root.cppm -o %t/Root.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/N.cppm -o %t/N.pcm \
+// RUN:     -fmodule-file=Root=%t/Root.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/N.v1.cppm -o %t/N.v1.pcm \
+// RUN:     -fmodule-file=Root=%t/Root.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/M.cppm -o %t/M.pcm \
+// RUN:     -fmodule-file=N=%t/N.pcm -fmodule-file=Root=%t/Root.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/M.cppm -o %t/M.v1.pcm \
+// RUN:     -fmodule-file=N=%t/N.v1.pcm -fmodule-file=Root=%t/Root.pcm
+//
+// RUN: diff %t/M.pcm %t/M.v1.pcm &> /dev/null
+
+//--- Root.cppm
+export module Root;
+export namespace N {
+
+}
+
+//--- N.cppm
+export module N;
+import Root;
+export namespace N {
+
+}
+
+//--- N.v1.cppm
+export module N;
+import Root;
+export namespace N {
+struct NN {};
+}
+
+//--- M.cppm
+export module M;
+import N;
+export namespace N {
+struct MM {};
+}
diff --git a/clang/test/Modules/no-transitive-decl-change-4.cppm b/clang/test/Modules/no-transitive-decl-change-4.cppm
index 944878be8df63..bb7e47f32669e 100644
--- a/clang/test/Modules/no-transitive-decl-change-4.cppm
+++ b/clang/test/Modules/no-transitive-decl-change-4.cppm
@@ -27,7 +27,7 @@
 // RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/B.cppm -o %t/B.v1.pcm \
 // RUN:     -fprebuilt-module-path=%t -fmodule-file=AWrapper=%t/AWrapper.v1.pcm -fmodule-file=A=%t/A.v1.pcm
 //
-// RUN: not diff %t/B.pcm %t/B.v1.pcm &> /dev/null
+// RUN: diff %t/B.pcm %t/B.v1.pcm &> /dev/null
 
 //--- T.cppm
 export module T;
diff --git a/clang/test/Modules/pr184957.cppm b/clang/test/Modules/pr184957.cppm
new file mode 100644
index 0000000000000..c08c10d978938
--- /dev/null
+++ b/clang/test/Modules/pr184957.cppm
@@ -0,0 +1,216 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: split-file %s %t
+// RUN: mkdir %t/tmp
+//
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std %t/wrap.std.tt.cppm -emit-reduced-module-interface -o %t/wrap.std.tt.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std %t/wrap.std.vec.cppm -emit-reduced-module-interface -o %t/wrap.std.vec.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std %t/not_std.cppm -emit-reduced-module-interface -o %t/not_std.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std %t/wrap.std.tt2.cppm -emit-reduced-module-interface -o %t/wrap.std.tt2.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std -fmodule-file=wrap.std.vec=%t/wrap.std.vec.pcm %t/wrap.std.vec.reexport.cppm -emit-reduced-module-interface -o %t/wrap.std.vec.reexport.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std -fmodule-file=wrap.std.tt=%t/wrap.std.tt.pcm -fmodule-file=wrap.std.vec=%t/wrap.std.vec.pcm -fmodule-file=wrap.std.vec.reexport=%t/wrap.std.vec.reexport.pcm -fmodule-file=wrap.std.tt2=%t/wrap.std.tt2.pcm -fmodule-file=not_std=%t/not_std.pcm %t/k.repro_dep.cppm -emit-reduced-module-interface -o %t/k.repro_dep.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std -fmodule-file=wrap.std.tt=%t/wrap.std.tt.pcm -fmodule-file=wrap.std.vec=%t/wrap.std.vec.pcm -fmodule-file=wrap.std.tt2=%t/wrap.std.tt2.pcm -fmodule-file=wrap.std.vec.reexport=%t/wrap.std.vec.reexport.pcm -fmodule-file=not_std=%t/not_std.pcm -fmodule-file=k.repro_dep=%t/k.repro_dep.pcm %t/k.repro.cxx -fsyntax-only -verify
+
+//--- std/allocator.h
+#ifndef _LIBCPP___MEMORY_ALLOCATOR_H
+#define _LIBCPP___MEMORY_ALLOCATOR_H
+
+#include <size_t.h>
+
+namespace std {
+
+enum align_val_t { __zero = 0, __max = (size_t)-1 };
+
+template <class _Tp>
+inline _Tp*
+__libcpp_allocate(size_t __n, [[__maybe_unused__]] size_t __align = 8) {
+  size_t __size = static_cast<size_t>(__n) * sizeof(_Tp);
+  return static_cast<_Tp*>(__builtin_operator_new(__size, static_cast<align_val_t>(__align)));
+}
+
+template <class _Tp>
+class allocator
+{
+public:
+  typedef _Tp value_type;
+
+  [[__nodiscard__]] _Tp* allocate(size_t __n) {
+    if (__builtin_is_constant_evaluated()) {
+      return static_cast<_Tp*>(::operator new(__n * sizeof(_Tp)));
+    } else {
+      return std::__libcpp_allocate<_Tp>(size_t(__n));
+    }
+  }
+};
+
+template <class _Tp, class _Up>
+inline bool
+operator==(const allocator<_Tp>&, const allocator<_Up>&) noexcept {
+  return true;
+}
+
+}
+
+#endif // _LIBCPP___MEMORY_ALLOCATOR_H
+
+//--- std/sys_wchar.h
+#ifndef _WCHAR_H
+#define _WCHAR_H 1
+
+#ifndef __FILE_defined
+  #define __FILE_defined 1
+
+  struct _IO_FILE;
+
+  typedef struct _IO_FILE FILE;
+#endif
+
+#endif /* wchar.h  */
+
+//--- std/char_traits.h
+#ifndef _LIBCPP___STRING_CHAR_TRAITS_H
+#define _LIBCPP___STRING_CHAR_TRAITS_H
+
+namespace std {
+
+template <class _Tp>
+inline size_t constexpr __constexpr_strlen(const _Tp* __str) noexcept {
+  if (__builtin_is_constant_evaluated()) {
+    size_t __i = 0;
+    for (; __str[__i] != '\0'; ++__i)
+      ;
+    return __i;
+  }
+  return 0;
+}
+
+template <class _CharT>
+struct char_traits;
+
+template <>
+struct char_traits<char> {
+  using char_type  = char;
+  
+  [[__nodiscard__]] static inline size_t constexpr
+  length(const char_type* __s) noexcept {
+    return std::__constexpr_strlen(__s);
+  }
+};
+
+}
+
+#endif // _LIBCPP___STRING_CHAR_TRAITS_H
+
+//--- std/type_traits
+#ifndef _LIBCPP_TYPE_TRAITS
+#define _LIBCPP_TYPE_TRAITS
+
+namespace std
+{}
+
+#endif // _LIBCPP_TYPE_TRAITS
+
+//--- std/ptrdiff_t.h
+#ifndef _LIBCPP___CSTDDEF_PTRDIFF_T_H
+#define _LIBCPP___CSTDDEF_PTRDIFF_T_H
+
+namespace std {
+
+using ptrdiff_t = decltype(static_cast<int*>(nullptr) - static_cast<int*>(nullptr));
+
+}
+
+#endif // _LIBCPP___CSTDDEF_PTRDIFF_T_H
+
+//--- std/size_t.h
+#ifndef _LIBCPP___CSTDDEF_SIZE_T_H
+#define _LIBCPP___CSTDDEF_SIZE_T_H
+
+namespace std {
+
+using size_t = decltype(sizeof(int));
+
+}
+
+#endif // _LIBCPP___CSTDDEF_SIZE_T_H
+
+//--- wrap.std.tt.cppm
+module;
+
+#include <type_traits>
+
+export module wrap.std.tt;
+
+//--- wrap.std.vec.cppm
+module;
+
+#include <allocator.h>
+#include <sys_wchar.h>
+
+export module wrap.std.vec;
+
+//--- not_std.cppm
+module;
+
+#include <allocator.h>
+
+
+export module not_std;
+
+namespace std {
+  template <class _Tp, class _Allocator>
+  class __split_buffer {
+  public:
+    __split_buffer() {
+      _Allocator a;
+      (void)a.allocate(1);
+    }
+  };
+}
+
+export namespace std {
+  template <typename T>
+  using problem = std::__split_buffer<T, std::allocator<T>>;
+}
+
+export using ::operator new;
+
+//--- wrap.std.tt2.cppm
+module;
+
+#include <type_traits>
+
+export module wrap.std.tt2;
+
+//--- wrap.std.vec.reexport.cppm
+export module wrap.std.vec.reexport;
+
+export import wrap.std.vec;
+
+//--- k.repro_dep.cppm
+module;
+
+#include <allocator.h>
+#include <sys_wchar.h>
+#include <char_traits.h>
+
+export module k.repro_dep;
+
+import wrap.std.tt;
+import wrap.std.vec.reexport;
+import wrap.std.vec;
+import wrap.std.tt2;
+
+import not_std;
+
+using XYZ = std::char_traits<char>;
+
+//--- k.repro.cxx
+// expected-no-diagnostics
+import not_std;
+import k.repro_dep;
+
+auto f() -> void
+{
+  std::problem< int > x;
+}



More information about the cfe-commits mailing list