[clang] [clang] load umbrella dir headers in sorted order (PR #156108)

Richard Howell via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 29 14:25:43 PDT 2025


https://github.com/rmaz created https://github.com/llvm/llvm-project/pull/156108

Clang modules sort the umbrella dir headers by name before adding to the
module's includes to ensure deterministic output across different file systems.
This is insufficient however, as the header search table is also serialized.
This includes all the loaded headers by file reference, which are allocated
incrementally. To ensure stable output we have to also create the file
references in sorted order.

>From cd8c7a2327fc6c5b9870eeefc24c6d8115521319 Mon Sep 17 00:00:00 2001
From: Richard Howell <rhow at meta.com>
Date: Fri, 29 Aug 2025 14:00:05 -0700
Subject: [PATCH] [clang] load umbrella dir headers in sorted order

Clang modules sort the umbrella dir headers by name before adding
to the module's includes to ensure deterministic output across
different file systems. This is insufficent however, as the header
search table is also serialized. This includes all the loaded
headers by file reference, which are allocated incrementally. To
ensure stable output we have to also create the file references in
sorted order.
---
 clang/lib/Frontend/FrontendAction.cpp         | 40 ++++++++++---------
 .../umbrella_header_order/module.modulemap    |  3 ++
 .../Inputs/umbrella_header_order/umbrella/A.h |  0
 .../Inputs/umbrella_header_order/umbrella/B.h |  0
 .../Inputs/umbrella_header_order/umbrella/C.h |  0
 .../Inputs/umbrella_header_order/umbrella/D.h |  0
 .../Inputs/umbrella_header_order/umbrella/E.h |  0
 .../Inputs/umbrella_header_order/umbrella/F.h |  0
 clang/test/Modules/umbrella_dir_order.m       | 11 +++++
 9 files changed, 35 insertions(+), 19 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/umbrella_header_order/module.modulemap
 create mode 100644 clang/test/Modules/Inputs/umbrella_header_order/umbrella/A.h
 create mode 100644 clang/test/Modules/Inputs/umbrella_header_order/umbrella/B.h
 create mode 100644 clang/test/Modules/Inputs/umbrella_header_order/umbrella/C.h
 create mode 100644 clang/test/Modules/Inputs/umbrella_header_order/umbrella/D.h
 create mode 100644 clang/test/Modules/Inputs/umbrella_header_order/umbrella/E.h
 create mode 100644 clang/test/Modules/Inputs/umbrella_header_order/umbrella/F.h
 create mode 100644 clang/test/Modules/umbrella_dir_order.m

diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index a7d6a068fe2d0..6b1fcac75ac2b 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -623,7 +623,7 @@ static std::error_code collectModuleHeaderIncludes(
     llvm::sys::path::native(UmbrellaDir->Entry.getName(), DirNative);
 
     llvm::vfs::FileSystem &FS = FileMgr.getVirtualFileSystem();
-    SmallVector<std::pair<std::string, FileEntryRef>, 8> Headers;
+    SmallVector<std::pair<std::string, std::string>, 8> HeaderPaths;
     for (llvm::vfs::recursive_directory_iterator Dir(FS, DirNative, EC), End;
          Dir != End && !EC; Dir.increment(EC)) {
       // Check whether this entry has an extension typically associated with
@@ -633,17 +633,6 @@ static std::error_code collectModuleHeaderIncludes(
                .Default(false))
         continue;
 
-      auto Header = FileMgr.getOptionalFileRef(Dir->path());
-      // FIXME: This shouldn't happen unless there is a file system race. Is
-      // that worth diagnosing?
-      if (!Header)
-        continue;
-
-      // If this header is marked 'unavailable' in this module, don't include
-      // it.
-      if (ModMap.isHeaderUnavailableInModule(*Header, Module))
-        continue;
-
       // Compute the relative path from the directory to this file.
       SmallVector<StringRef, 16> Components;
       auto PathIt = llvm::sys::path::rbegin(Dir->path());
@@ -655,20 +644,33 @@ static std::error_code collectModuleHeaderIncludes(
            ++It)
         llvm::sys::path::append(RelativeHeader, *It);
 
-      std::string RelName = RelativeHeader.c_str();
-      Headers.push_back(std::make_pair(RelName, *Header));
+      HeaderPaths.push_back(
+          std::make_pair(Dir->path().str(), RelativeHeader.c_str()));
     }
 
     if (EC)
       return EC;
 
     // Sort header paths and make the header inclusion order deterministic
-    // across different OSs and filesystems.
-    llvm::sort(Headers, llvm::less_first());
-    for (auto &H : Headers) {
+    // across different OSs and filesystems. As the header search table
+    // serialization order depends on the file reference UID, we need to create
+    // file references in deterministic order too.
+    llvm::sort(HeaderPaths, llvm::less_first());
+    for (auto &[Path, RelPath] : HeaderPaths) {
+      auto Header = FileMgr.getOptionalFileRef(Path);
+      // FIXME: This shouldn't happen unless there is a file system race. Is
+      // that worth diagnosing?
+      if (!Header)
+        continue;
+
+      // If this header is marked 'unavailable' in this module, don't include
+      // it.
+      if (ModMap.isHeaderUnavailableInModule(*Header, Module))
+        continue;
+
       // Include this header as part of the umbrella directory.
-      Module->addTopHeader(H.second);
-      addHeaderInclude(H.first, Includes, LangOpts, Module->IsExternC);
+      Module->addTopHeader(*Header);
+      addHeaderInclude(RelPath, Includes, LangOpts, Module->IsExternC);
     }
   }
 
diff --git a/clang/test/Modules/Inputs/umbrella_header_order/module.modulemap b/clang/test/Modules/Inputs/umbrella_header_order/module.modulemap
new file mode 100644
index 0000000000000..5c64e33068822
--- /dev/null
+++ b/clang/test/Modules/Inputs/umbrella_header_order/module.modulemap
@@ -0,0 +1,3 @@
+module x {
+    umbrella "umbrella"
+}
\ No newline at end of file
diff --git a/clang/test/Modules/Inputs/umbrella_header_order/umbrella/A.h b/clang/test/Modules/Inputs/umbrella_header_order/umbrella/A.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Modules/Inputs/umbrella_header_order/umbrella/B.h b/clang/test/Modules/Inputs/umbrella_header_order/umbrella/B.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Modules/Inputs/umbrella_header_order/umbrella/C.h b/clang/test/Modules/Inputs/umbrella_header_order/umbrella/C.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Modules/Inputs/umbrella_header_order/umbrella/D.h b/clang/test/Modules/Inputs/umbrella_header_order/umbrella/D.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Modules/Inputs/umbrella_header_order/umbrella/E.h b/clang/test/Modules/Inputs/umbrella_header_order/umbrella/E.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Modules/Inputs/umbrella_header_order/umbrella/F.h b/clang/test/Modules/Inputs/umbrella_header_order/umbrella/F.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Modules/umbrella_dir_order.m b/clang/test/Modules/umbrella_dir_order.m
new file mode 100644
index 0000000000000..179a59eb94609
--- /dev/null
+++ b/clang/test/Modules/umbrella_dir_order.m
@@ -0,0 +1,11 @@
+// RUN: cd %S
+// RUN: %clang_cc1 -fmodules -fno-implicit-modules -x objective-c -fmodule-name=x -emit-module Inputs/umbrella_header_order/module.modulemap -o %t/mod.pcm
+// RUN: llvm-bcanalyzer --dump --disable-histogram %t/mod.pcm | FileCheck %s
+
+// CHECK: <INPUT_FILE abbrevid=4 op0=1 op1=36 op2=0 op3=0 op4=0 op5=1 op6=1 op7=16/> blob data = 'module.modulemap'
+// CHECK: <INPUT_FILE abbrevid=4 op0=2 op1=0 op2=0 op3=0 op4=0 op5=0 op6=0 op7=12/> blob data = 'umbrella/A.h'
+// CHECK: <INPUT_FILE abbrevid=4 op0=3 op1=0 op2=0 op3=0 op4=0 op5=0 op6=0 op7=12/> blob data = 'umbrella/B.h'
+// CHECK: <INPUT_FILE abbrevid=4 op0=4 op1=0 op2=0 op3=0 op4=0 op5=0 op6=0 op7=12/> blob data = 'umbrella/C.h'
+// CHECK: <INPUT_FILE abbrevid=4 op0=5 op1=0 op2=0 op3=0 op4=0 op5=0 op6=0 op7=12/> blob data = 'umbrella/D.h'
+// CHECK: <INPUT_FILE abbrevid=4 op0=6 op1=0 op2=0 op3=0 op4=0 op5=0 op6=0 op7=12/> blob data = 'umbrella/E.h'
+// CHECK: <INPUT_FILE abbrevid=4 op0=7 op1=0 op2=0 op3=0 op4=0 op5=0 op6=0 op7=12/> blob data = 'umbrella/F.h'



More information about the cfe-commits mailing list