[clang] 8eb0dfe - [Serialization] [ASTWriter] Try to not record namespace as much as possible (#179178)

via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 12 02:20:51 PDT 2026


Author: Chuanqi Xu
Date: 2026-03-12T09:20:46Z
New Revision: 8eb0dfe5b6d658fa9991612060a1840927fd2eea

URL: https://github.com/llvm/llvm-project/commit/8eb0dfe5b6d658fa9991612060a1840927fd2eea
DIFF: https://github.com/llvm/llvm-project/commit/8eb0dfe5b6d658fa9991612060a1840927fd2eea.diff

LOG: [Serialization] [ASTWriter] Try to not record namespace as much as possible (#179178)

See

https://clang.llvm.org/docs/StandardCPlusPlusModules.html#experimental-non-cascading-changes
for the full background.

In short, we want to cut off the dependencies to improve the
recompilation time.

And namespace is special, as the same namespace can occur in many TUs.

This patch tries to clean up unneeded reference to namespace from other
module file. The touched parts are:

- When write on disk lookup table, don't write the merged table. This
makes the ownership/dependency much cleaner. For performance, I think
the intention was to make the cost of mergin table amortized. But in our
internal workloads, I didn't observe any regression if I turn off
writing the merged table.
- When writing redeclarations, only write the ID of the first namespace
and avoid referenece to all other previous namespaces. (I don't want to
write the first namespace either... but it is more complex).
- When writing the name lookup table, try to write the local namespace.

@jtstogel @mpark I want to invite you to test this with your internal
workloads to figure out the correctness and the performance impact on
this.

I know I can make the change clean for you by inserting code guarded by
"if writing named module" but I think it will be much better if we can
make the underlying implementation more homogeneous if possible.

Added: 
    clang/test/Modules/no-transitive-change-namespace.cppm
    clang/test/Modules/pr184957.cppm

Modified: 
    clang/include/clang/Serialization/ASTWriter.h
    clang/lib/Serialization/ASTWriter.cpp
    clang/lib/Serialization/ASTWriterDecl.cpp
    clang/test/Modules/no-transitive-decl-change-4.cppm

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 0f3993ad01693..88b62c7f4e6b4 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -928,8 +928,6 @@ class ASTWriter : public ASTDeserializationListener,
 
   void handleVTable(CXXRecordDecl *RD);
 
-  void addTouchedModuleFile(serialization::ModuleFile *);
-
 private:
   // ASTDeserializationListener implementation
   void ReaderInitialized(ASTReader *Reader) override;

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index ec718169550aa..71ea057c476e6 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -4085,10 +4085,6 @@ void ASTWriter::handleVTable(CXXRecordDecl *RD) {
   PendingEmittingVTables.push_back(RD);
 }
 
-void ASTWriter::addTouchedModuleFile(serialization::ModuleFile *MF) {
-  TouchedModuleFiles.insert(MF);
-}
-
 //===----------------------------------------------------------------------===//
 // DeclContext's Name Lookup Table Serialization
 //===----------------------------------------------------------------------===//
@@ -4133,7 +4129,6 @@ class ASTDeclContextNameLookupTraitBase {
            "have reference to loaded module file but no chain?");
 
     using namespace llvm::support;
-    Writer.addTouchedModuleFile(F);
     endian::write<uint32_t>(Out, Writer.getChain()->getModuleFileID(F),
                             llvm::endianness::little);
   }
@@ -4331,6 +4326,14 @@ static bool isTULocalInNamedModules(NamedDecl *D) {
   return D->getLinkageInternal() == Linkage::Internal;
 }
 
+static NamedDecl *getLocalRedecl(NamedDecl *D) {
+  for (auto *RD : D->redecls())
+    if (!RD->isFromASTFile())
+      return cast<NamedDecl>(RD);
+
+  return D;
+}
+
 class ASTDeclContextNameLookupTrait
     : public ASTDeclContextNameTrivialLookupTrait {
 public:
@@ -4404,6 +4407,13 @@ class ASTDeclContextNameLookupTrait
       NamedDecl *DeclForLocalLookup =
           getDeclForLocalLookup(Writer.getLangOpts(), D);
 
+      // Namespace may have many redeclarations in many TU.
+      // Also, these namespaces are symmetric. So that we try to use
+      // the local redecl if any.
+      if (isa<NamespaceDecl>(DeclForLocalLookup) &&
+          DeclForLocalLookup->isFromASTFile())
+        DeclForLocalLookup = getLocalRedecl(DeclForLocalLookup);
+
       if (Writer.getDoneWritingDeclsAndTypes() &&
           !Writer.wasDeclEmitted(DeclForLocalLookup))
         return;
@@ -4525,7 +4535,6 @@ class LazySpecializationInfoLookupTrait {
            "have reference to loaded module file but no chain?");
 
     using namespace llvm::support;
-    Writer.addTouchedModuleFile(F);
     endian::write<uint32_t>(Out, Writer.getChain()->getModuleFileID(F),
                             llvm::endianness::little);
   }
@@ -4624,7 +4633,16 @@ void ASTWriter::GenerateSpecializationInfoLookupTable(
     Generator.insert(HashValue, Trait.getData(Specs, ExisitingSpecs), Trait);
   }
 
-  Generator.emit(LookupTable, Trait, Lookups ? &Lookups->Table : nullptr);
+  // Reduced BMI may not emit everything in the lookup table,
+  // If Reduced BMI **partially** emits some decls,
+  // then the generator may not emit the corresponding entry for the
+  // corresponding name is already there. See
+  // MultiOnDiskHashTableGenerator::insert and
+  // MultiOnDiskHashTableGenerator::emit for details.
+  // So we won't emit the lookup table if we're generating reduced BMI.
+  auto *ToEmitMaybeMergedLookupTable =
+      (!isGeneratingReducedBMI() && Lookups) ? &Lookups->Table : nullptr;
+  Generator.emit(LookupTable, Trait, ToEmitMaybeMergedLookupTable);
 }
 
 uint64_t ASTWriter::WriteSpecializationInfoLookupTable(
@@ -4821,7 +4839,16 @@ void ASTWriter::GenerateNameLookupTable(
   // Create the on-disk hash table. Also emit the existing imported and
   // merged table if there is one.
   auto *Lookups = Chain ? Chain->getLoadedLookupTables(DC) : nullptr;
-  Generator.emit(LookupTable, Trait, Lookups ? &Lookups->Table : nullptr);
+  // Reduced BMI may not emit everything in the lookup table,
+  // If Reduced BMI **partially** emits some decls,
+  // then the generator may not emit the corresponding entry for the
+  // corresponding name is already there. See
+  // MultiOnDiskHashTableGenerator::insert and
+  // MultiOnDiskHashTableGenerator::emit for details.
+  // So we won't emit the lookup table if we're generating reduced BMI.
+  auto *ToEmitMaybeMergedLookupTable =
+      (!isGeneratingReducedBMI() && Lookups) ? &Lookups->Table : nullptr;
+  Generator.emit(LookupTable, Trait, ToEmitMaybeMergedLookupTable);
 
   const auto &ModuleLocalDecls = Trait.getModuleLocalDecls();
   if (!ModuleLocalDecls.empty()) {
@@ -4837,11 +4864,15 @@ void ASTWriter::GenerateNameLookupTable(
                                         ModuleLocalTrait);
     }
 
+    // See the above comment. We won't emit the merged table if we're generating
+    // reduced BMI.
     auto *ModuleLocalLookups =
-        Chain ? Chain->getModuleLocalLookupTables(DC) : nullptr;
-    ModuleLocalLookupGenerator.emit(
-        ModuleLocalLookupTable, ModuleLocalTrait,
-        ModuleLocalLookups ? &ModuleLocalLookups->Table : nullptr);
+        (isGeneratingReducedBMI() && Chain &&
+         Chain->getModuleLocalLookupTables(DC))
+            ? &Chain->getModuleLocalLookupTables(DC)->Table
+            : nullptr;
+    ModuleLocalLookupGenerator.emit(ModuleLocalLookupTable, ModuleLocalTrait,
+                                    ModuleLocalLookups);
   }
 
   const auto &TULocalDecls = Trait.getTULocalDecls();
@@ -4857,9 +4888,13 @@ void ASTWriter::GenerateNameLookupTable(
       TULookupGenerator.insert(Key, TULocalTrait.getData(IDs), TULocalTrait);
     }
 
-    auto *TULocalLookups = Chain ? Chain->getTULocalLookupTables(DC) : nullptr;
-    TULookupGenerator.emit(TULookupTable, TULocalTrait,
-                           TULocalLookups ? &TULocalLookups->Table : nullptr);
+    // See the above comment. We won't emit the merged table if we're generating
+    // reduced BMI.
+    auto *TULocalLookups =
+        (isGeneratingReducedBMI() && Chain && Chain->getTULocalLookupTables(DC))
+            ? &Chain->getTULocalLookupTables(DC)->Table
+            : nullptr;
+    TULookupGenerator.emit(TULookupTable, TULocalTrait, TULocalLookups);
   }
 }
 

diff  --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 7646d5d5efe00..d2fb6f9e883d0 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2234,8 +2234,15 @@ void ASTDeclWriter::VisitRedeclarable(Redeclarable<T> *D) {
       // redecl chain.
       unsigned I = Record.size();
       Record.push_back(0);
-      if (Writer.Chain)
-        AddFirstDeclFromEachModule(DAsT, /*IncludeLocal*/false);
+      if (Writer.Chain) {
+        // Namespace can have many redeclaration in many TU.
+        // We try to avoid touching every redeclaration to try to
+        // reduce the dependencies as much as possible.
+        if (!isa<NamespaceDecl>(DAsT))
+          AddFirstDeclFromEachModule(DAsT, /*IncludeLocal=*/ false);
+        else
+          Record.AddDeclRef(/*Decl=*/nullptr);
+      }
       // This is the number of imported first declarations + 1.
       Record[I] = Record.size() - I;
 
@@ -2265,8 +2272,10 @@ void ASTDeclWriter::VisitRedeclarable(Redeclarable<T> *D) {
     //
     // FIXME: This is not correct; when we reach an imported declaration we
     // won't emit its previous declaration.
-    (void)Writer.GetDeclRef(D->getPreviousDecl());
-    (void)Writer.GetDeclRef(MostRecent);
+    if (D->getPreviousDecl() && !D->getPreviousDecl()->isFromASTFile())
+      (void)Writer.GetDeclRef(D->getPreviousDecl());
+    if (!MostRecent->isFromASTFile())
+      (void)Writer.GetDeclRef(MostRecent);
   } else {
     // We use the sentinel value 0 to indicate an only declaration.
     Record.push_back(0);

diff  --git a/clang/test/Modules/no-transitive-change-namespace.cppm b/clang/test/Modules/no-transitive-change-namespace.cppm
new file mode 100644
index 0000000000000..f67dd8533a6df
--- /dev/null
+++ b/clang/test/Modules/no-transitive-change-namespace.cppm
@@ -0,0 +1,44 @@
+// Test that adding a new decl in a module which originally only contained a namespace
+// won't break the dependency.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/Root.cppm -o %t/Root.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/N.cppm -o %t/N.pcm \
+// RUN:     -fmodule-file=Root=%t/Root.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/N.v1.cppm -o %t/N.v1.pcm \
+// RUN:     -fmodule-file=Root=%t/Root.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/M.cppm -o %t/M.pcm \
+// RUN:     -fmodule-file=N=%t/N.pcm -fmodule-file=Root=%t/Root.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/M.cppm -o %t/M.v1.pcm \
+// RUN:     -fmodule-file=N=%t/N.v1.pcm -fmodule-file=Root=%t/Root.pcm
+//
+// RUN: 
diff  %t/M.pcm %t/M.v1.pcm &> /dev/null
+
+//--- Root.cppm
+export module Root;
+export namespace N {
+
+}
+
+//--- N.cppm
+export module N;
+import Root;
+export namespace N {
+
+}
+
+//--- N.v1.cppm
+export module N;
+import Root;
+export namespace N {
+struct NN {};
+}
+
+//--- M.cppm
+export module M;
+import N;
+export namespace N {
+struct MM {};
+}

diff  --git a/clang/test/Modules/no-transitive-decl-change-4.cppm b/clang/test/Modules/no-transitive-decl-change-4.cppm
index 944878be8df63..bb7e47f32669e 100644
--- a/clang/test/Modules/no-transitive-decl-change-4.cppm
+++ b/clang/test/Modules/no-transitive-decl-change-4.cppm
@@ -27,7 +27,7 @@
 // RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/B.cppm -o %t/B.v1.pcm \
 // RUN:     -fprebuilt-module-path=%t -fmodule-file=AWrapper=%t/AWrapper.v1.pcm -fmodule-file=A=%t/A.v1.pcm
 //
-// RUN: not 
diff  %t/B.pcm %t/B.v1.pcm &> /dev/null
+// RUN: 
diff  %t/B.pcm %t/B.v1.pcm &> /dev/null
 
 //--- T.cppm
 export module T;

diff  --git a/clang/test/Modules/pr184957.cppm b/clang/test/Modules/pr184957.cppm
new file mode 100644
index 0000000000000..c08c10d978938
--- /dev/null
+++ b/clang/test/Modules/pr184957.cppm
@@ -0,0 +1,216 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: split-file %s %t
+// RUN: mkdir %t/tmp
+//
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std %t/wrap.std.tt.cppm -emit-reduced-module-interface -o %t/wrap.std.tt.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std %t/wrap.std.vec.cppm -emit-reduced-module-interface -o %t/wrap.std.vec.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std %t/not_std.cppm -emit-reduced-module-interface -o %t/not_std.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std %t/wrap.std.tt2.cppm -emit-reduced-module-interface -o %t/wrap.std.tt2.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std -fmodule-file=wrap.std.vec=%t/wrap.std.vec.pcm %t/wrap.std.vec.reexport.cppm -emit-reduced-module-interface -o %t/wrap.std.vec.reexport.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std -fmodule-file=wrap.std.tt=%t/wrap.std.tt.pcm -fmodule-file=wrap.std.vec=%t/wrap.std.vec.pcm -fmodule-file=wrap.std.vec.reexport=%t/wrap.std.vec.reexport.pcm -fmodule-file=wrap.std.tt2=%t/wrap.std.tt2.pcm -fmodule-file=not_std=%t/not_std.pcm %t/k.repro_dep.cppm -emit-reduced-module-interface -o %t/k.repro_dep.pcm
+// RUN: %clang_cc1 -std=c++23 -nostdinc++ -I %t/std -fmodule-file=wrap.std.tt=%t/wrap.std.tt.pcm -fmodule-file=wrap.std.vec=%t/wrap.std.vec.pcm -fmodule-file=wrap.std.tt2=%t/wrap.std.tt2.pcm -fmodule-file=wrap.std.vec.reexport=%t/wrap.std.vec.reexport.pcm -fmodule-file=not_std=%t/not_std.pcm -fmodule-file=k.repro_dep=%t/k.repro_dep.pcm %t/k.repro.cxx -fsyntax-only -verify
+
+//--- std/allocator.h
+#ifndef _LIBCPP___MEMORY_ALLOCATOR_H
+#define _LIBCPP___MEMORY_ALLOCATOR_H
+
+#include <size_t.h>
+
+namespace std {
+
+enum align_val_t { __zero = 0, __max = (size_t)-1 };
+
+template <class _Tp>
+inline _Tp*
+__libcpp_allocate(size_t __n, [[__maybe_unused__]] size_t __align = 8) {
+  size_t __size = static_cast<size_t>(__n) * sizeof(_Tp);
+  return static_cast<_Tp*>(__builtin_operator_new(__size, static_cast<align_val_t>(__align)));
+}
+
+template <class _Tp>
+class allocator
+{
+public:
+  typedef _Tp value_type;
+
+  [[__nodiscard__]] _Tp* allocate(size_t __n) {
+    if (__builtin_is_constant_evaluated()) {
+      return static_cast<_Tp*>(::operator new(__n * sizeof(_Tp)));
+    } else {
+      return std::__libcpp_allocate<_Tp>(size_t(__n));
+    }
+  }
+};
+
+template <class _Tp, class _Up>
+inline bool
+operator==(const allocator<_Tp>&, const allocator<_Up>&) noexcept {
+  return true;
+}
+
+}
+
+#endif // _LIBCPP___MEMORY_ALLOCATOR_H
+
+//--- std/sys_wchar.h
+#ifndef _WCHAR_H
+#define _WCHAR_H 1
+
+#ifndef __FILE_defined
+  #define __FILE_defined 1
+
+  struct _IO_FILE;
+
+  typedef struct _IO_FILE FILE;
+#endif
+
+#endif /* wchar.h  */
+
+//--- std/char_traits.h
+#ifndef _LIBCPP___STRING_CHAR_TRAITS_H
+#define _LIBCPP___STRING_CHAR_TRAITS_H
+
+namespace std {
+
+template <class _Tp>
+inline size_t constexpr __constexpr_strlen(const _Tp* __str) noexcept {
+  if (__builtin_is_constant_evaluated()) {
+    size_t __i = 0;
+    for (; __str[__i] != '\0'; ++__i)
+      ;
+    return __i;
+  }
+  return 0;
+}
+
+template <class _CharT>
+struct char_traits;
+
+template <>
+struct char_traits<char> {
+  using char_type  = char;
+  
+  [[__nodiscard__]] static inline size_t constexpr
+  length(const char_type* __s) noexcept {
+    return std::__constexpr_strlen(__s);
+  }
+};
+
+}
+
+#endif // _LIBCPP___STRING_CHAR_TRAITS_H
+
+//--- std/type_traits
+#ifndef _LIBCPP_TYPE_TRAITS
+#define _LIBCPP_TYPE_TRAITS
+
+namespace std
+{}
+
+#endif // _LIBCPP_TYPE_TRAITS
+
+//--- std/ptr
diff _t.h
+#ifndef _LIBCPP___CSTDDEF_PTRDIFF_T_H
+#define _LIBCPP___CSTDDEF_PTRDIFF_T_H
+
+namespace std {
+
+using ptr
diff _t = decltype(static_cast<int*>(nullptr) - static_cast<int*>(nullptr));
+
+}
+
+#endif // _LIBCPP___CSTDDEF_PTRDIFF_T_H
+
+//--- std/size_t.h
+#ifndef _LIBCPP___CSTDDEF_SIZE_T_H
+#define _LIBCPP___CSTDDEF_SIZE_T_H
+
+namespace std {
+
+using size_t = decltype(sizeof(int));
+
+}
+
+#endif // _LIBCPP___CSTDDEF_SIZE_T_H
+
+//--- wrap.std.tt.cppm
+module;
+
+#include <type_traits>
+
+export module wrap.std.tt;
+
+//--- wrap.std.vec.cppm
+module;
+
+#include <allocator.h>
+#include <sys_wchar.h>
+
+export module wrap.std.vec;
+
+//--- not_std.cppm
+module;
+
+#include <allocator.h>
+
+
+export module not_std;
+
+namespace std {
+  template <class _Tp, class _Allocator>
+  class __split_buffer {
+  public:
+    __split_buffer() {
+      _Allocator a;
+      (void)a.allocate(1);
+    }
+  };
+}
+
+export namespace std {
+  template <typename T>
+  using problem = std::__split_buffer<T, std::allocator<T>>;
+}
+
+export using ::operator new;
+
+//--- wrap.std.tt2.cppm
+module;
+
+#include <type_traits>
+
+export module wrap.std.tt2;
+
+//--- wrap.std.vec.reexport.cppm
+export module wrap.std.vec.reexport;
+
+export import wrap.std.vec;
+
+//--- k.repro_dep.cppm
+module;
+
+#include <allocator.h>
+#include <sys_wchar.h>
+#include <char_traits.h>
+
+export module k.repro_dep;
+
+import wrap.std.tt;
+import wrap.std.vec.reexport;
+import wrap.std.vec;
+import wrap.std.tt2;
+
+import not_std;
+
+using XYZ = std::char_traits<char>;
+
+//--- k.repro.cxx
+// expected-no-diagnostics
+import not_std;
+import k.repro_dep;
+
+auto f() -> void
+{
+  std::problem< int > x;
+}


        


More information about the cfe-commits mailing list