[clang] [clang][DependencyScanning] Track modules that resolve from sysroot. (PR #130634)

Cyndy Ishida via cfe-commits cfe-commits at lists.llvm.org
Mon Mar 10 20:04:05 PDT 2025


https://github.com/cyndyishida updated https://github.com/llvm/llvm-project/pull/130634

>From aca254a154489fda68292f6d06a866ae7011a7f6 Mon Sep 17 00:00:00 2001
From: Cyndy Ishida <cyndy_ishida at apple.com>
Date: Mon, 10 Mar 2025 09:06:32 -0700
Subject: [PATCH 1/2] [clang][DependencyScanning] Track modules that resolve
 from sysroot.

That patch tracks whether all the file & module dependencies of a module
resolve to a sysroot location. This information will later be queried by
build systems for determining where to store the accompanying pcms.
---
 .../DependencyScanning/ModuleDepCollector.h   |   7 ++
 .../DependencyScanning/ModuleDepCollector.cpp |  25 +++-
 clang/test/ClangScanDeps/modules-in-sysroot.c | 107 ++++++++++++++++++
 clang/tools/clang-scan-deps/ClangScanDeps.cpp |   2 +
 4 files changed, 139 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/ClangScanDeps/modules-in-sysroot.c

diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
index 20fb4de6a2a73..6187f0168e6d9 100644
--- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
+++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
@@ -114,6 +114,10 @@ struct ModuleDeps {
   /// Whether this is a "system" module.
   bool IsSystem;
 
+  /// Whether this is a module where it's dependencies resolve within the
+  /// sysroot.
+  bool IsInSysroot;
+
   /// The path to the modulemap file which defines this module.
   ///
   /// This can be used to explicitly build this module. This file will
@@ -219,6 +223,9 @@ class ModuleDepCollectorPP final : public PPCallbacks {
                               llvm::DenseSet<const Module *> &AddedModules);
   void addAffectingClangModule(const Module *M, ModuleDeps &MD,
                           llvm::DenseSet<const Module *> &AddedModules);
+
+  /// Add discovered module dependency for the given module.
+  void addClangModule(const Module *M, const ModuleID ID, ModuleDeps &MD);
 };
 
 /// Collects modular and non-modular dependencies of the main file by attaching
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index 36b75c1016cd8..86eda34472cf0 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -698,6 +698,15 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
 
   MD.ID.ModuleName = M->getFullModuleName();
   MD.IsSystem = M->IsSystem;
+
+  // Start off with the assumption that this module is in the sysroot when there
+  // is a sysroot provided. As more dependencies are discovered, check if those
+  // come from the provided sysroot.
+  const StringRef CurrSysroot = MDC.ScanInstance.getHeaderSearchOpts().Sysroot;
+  MD.IsInSysroot =
+      !CurrSysroot.empty() &&
+      (llvm::sys::path::root_directory(CurrSysroot) != CurrSysroot);
+
   // For modules which use export_as link name, the linked product that of the
   // corresponding export_as-named module.
   if (!M->UseExportAsModuleLinkName)
@@ -739,6 +748,11 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
   MDC.ScanInstance.getASTReader()->visitInputFileInfos(
       *MF, /*IncludeSystem=*/true,
       [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
+        auto FullFilePath = ASTReader::ResolveImportedPath(
+            PathBuf, IFI.UnresolvedImportedFilename, MF->BaseDirectory);
+        if (MD.IsInSysroot)
+          MD.IsInSysroot = FullFilePath->starts_with(CurrSysroot);
+        PathBuf.resize_for_overwrite(256);
         if (!(IFI.TopLevel && IFI.ModuleMap))
           return;
         if (IFI.UnresolvedImportedFilenameAsRequested.ends_with(
@@ -835,6 +849,13 @@ void ModuleDepCollectorPP::addAllSubmoduleDeps(
   });
 }
 
+void ModuleDepCollectorPP::addClangModule(const Module *M, const ModuleID ID,
+                                          ModuleDeps &MD) {
+  MD.ClangModuleDeps.push_back(ID);
+  if (MD.IsInSysroot)
+    MD.IsInSysroot = MDC.ModularDeps[M]->IsInSysroot;
+}
+
 void ModuleDepCollectorPP::addModuleDep(
     const Module *M, ModuleDeps &MD,
     llvm::DenseSet<const Module *> &AddedModules) {
@@ -843,7 +864,7 @@ void ModuleDepCollectorPP::addModuleDep(
         !MDC.isPrebuiltModule(Import)) {
       if (auto ImportID = handleTopLevelModule(Import->getTopLevelModule()))
         if (AddedModules.insert(Import->getTopLevelModule()).second)
-          MD.ClangModuleDeps.push_back(*ImportID);
+          addClangModule(Import->getTopLevelModule(), *ImportID, MD);
     }
   }
 }
@@ -867,7 +888,7 @@ void ModuleDepCollectorPP::addAffectingClangModule(
         !MDC.isPrebuiltModule(Affecting)) {
       if (auto ImportID = handleTopLevelModule(Affecting))
         if (AddedModules.insert(Affecting).second)
-          MD.ClangModuleDeps.push_back(*ImportID);
+          addClangModule(Affecting, *ImportID, MD);
     }
   }
 }
diff --git a/clang/test/ClangScanDeps/modules-in-sysroot.c b/clang/test/ClangScanDeps/modules-in-sysroot.c
new file mode 100644
index 0000000000000..d96aa69c0e8f4
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-in-sysroot.c
@@ -0,0 +1,107 @@
+// This test verifies modules that are entirely comprised from sysroot inputs are captured in
+// dependency information.
+
+// The first compilation verifies that transitive dependencies on non-sysroot input are captured.
+// The second compilation verifies that external paths are resolved when a vfsoverlay is applied when considering sysroot-ness.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/compile-commands.json.in > %t/compile-commands.json
+// RUN: sed -e "s|DIR|%/t|g" %t/overlay.json.template > %t/overlay.json
+// RUN: clang-scan-deps -compilation-database %t/compile-commands.json \
+// RUN:   -j 1 -format experimental-full > %t/deps.db
+// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
+
+// CHECK:   "modules": [
+// CHECK-NEXT:     {
+// CHECK:            "is-in-sysroot": true,
+// CHECK:            "name": "A"
+
+// Verify that there are no more occurances of sysroot.
+// CHECK-NOT:            "is-in-sysroot"
+
+// CHECK:            "name": "A"
+// CHECK:            "USE_VFS"
+// CHECK:            "name": "B"
+// CHECK:            "name": "C"
+// CHECK:            "name": "D"
+// CHECK:            "name": "NotInSDK"
+
+//--- compile-commands.json.in
+[
+{
+  "directory": "DIR",
+  "command": "clang -c DIR/client.m -isysroot DIR/MacOSX.sdk -I DIR/BuildDir -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
+  "file": "DIR/client.m"
+},
+{
+  "directory": "DIR",
+  "command": "clang -c DIR/client.m -isysroot DIR/MacOSX.sdk  -ivfsoverlay DIR/overlay.json -DUSE_VFS -I DIR/BuildDir -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
+  "file": "DIR/client.m"
+}
+]
+
+//--- overlay.json.template
+{
+  "version": 0,
+  "case-sensitive": "false",
+  "roots": [
+    {
+          "external-contents": "DIR/local/A/A_vfs.h",
+          "name": "DIR/MacOSX.sdk/usr/include/A/A_vfs.h",
+          "type": "file"
+    }
+  ]
+}
+
+//--- MacOSX.sdk/usr/include/A/module.modulemap
+module A {
+  umbrella "."
+}
+
+//--- MacOSX.sdk/usr/include/A/A.h
+#ifdef USE_VFS
+#include <A/A_vfs.h>
+#endif 
+typedef int A_t;
+
+//--- local/A/A_vfs.h
+typedef int typeFromVFS;
+
+//--- MacOSX.sdk/usr/include/B/module.modulemap
+module B [system] {
+  umbrella "."
+}
+
+//--- MacOSX.sdk/usr/include/B/B.h
+#include <C/C.h>
+typedef int B_t;
+
+//--- MacOSX.sdk/usr/include/C/module.modulemap
+module C [system] {
+  umbrella "."
+}
+
+//--- MacOSX.sdk/usr/include/C/C.h
+#include <D/D.h>
+
+//--- MacOSX.sdk/usr/include/D/module.modulemap
+module D [system] {
+  umbrella "."
+}
+
+// Simulate a header that will be resolved in a local directory, from a sysroot header.
+//--- MacOSX.sdk/usr/include/D/D.h
+#include <HeaderNotFoundInSDK.h>
+
+//--- BuildDir/module.modulemap
+module NotInSDK [system] {
+  umbrella "."
+}
+
+//--- BuildDir/HeaderNotFoundInSDK.h
+typedef int local_t;
+
+//--- client.m
+#include <A/A.h>
+#include <B/B.h>
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 3bdeb461e4bfa..f5946b30fb84d 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -471,6 +471,8 @@ class FullDeps {
         for (auto &&ModID : ModuleIDs) {
           auto &MD = Modules[ModID];
           JOS.object([&] {
+            if (MD.IsInSysroot)
+              JOS.attribute("is-in-sysroot", MD.IsInSysroot);
             JOS.attributeArray("clang-module-deps",
                                toJSONSorted(JOS, MD.ClangModuleDeps));
             JOS.attribute("clang-modulemap-file",

>From fd4abaa6c8097694fe1fad2510beb6b11653f01f Mon Sep 17 00:00:00 2001
From: Cyndy Ishida <cyndy_ishida at apple.com>
Date: Mon, 10 Mar 2025 13:39:42 -0700
Subject: [PATCH 2/2] Add explicit search paths into SDK as they aren't passed
 with non-darwin targets

---
 clang/test/ClangScanDeps/modules-in-sysroot.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/clang/test/ClangScanDeps/modules-in-sysroot.c b/clang/test/ClangScanDeps/modules-in-sysroot.c
index d96aa69c0e8f4..2ecf9c293b23d 100644
--- a/clang/test/ClangScanDeps/modules-in-sysroot.c
+++ b/clang/test/ClangScanDeps/modules-in-sysroot.c
@@ -4,6 +4,7 @@
 // The first compilation verifies that transitive dependencies on non-sysroot input are captured.
 // The second compilation verifies that external paths are resolved when a vfsoverlay is applied when considering sysroot-ness.
 
+// REQUIRES: shell
 // RUN: rm -rf %t
 // RUN: split-file %s %t
 // RUN: sed -e "s|DIR|%/t|g" %t/compile-commands.json.in > %t/compile-commands.json
@@ -31,13 +32,13 @@
 [
 {
   "directory": "DIR",
-  "command": "clang -c DIR/client.m -isysroot DIR/MacOSX.sdk -I DIR/BuildDir -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
-  "file": "DIR/client.m"
+  "command": "clang -c DIR/client.c -isysroot DIR/MacOSX.sdk -IDIR/MacOSX.sdk/usr/include -IDIR/BuildDir -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
+  "file": "DIR/client.c"
 },
 {
   "directory": "DIR",
-  "command": "clang -c DIR/client.m -isysroot DIR/MacOSX.sdk  -ivfsoverlay DIR/overlay.json -DUSE_VFS -I DIR/BuildDir -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
-  "file": "DIR/client.m"
+  "command": "clang -c DIR/client.c -isysroot DIR/MacOSX.sdk -IDIR/MacOSX.sdk/usr/include -ivfsoverlay DIR/overlay.json -DUSE_VFS -IDIR/BuildDir -fmodules -fmodules-cache-path=DIR/module-cache -fimplicit-module-maps",
+  "file": "DIR/client.c"
 }
 ]
 
@@ -102,6 +103,6 @@ module NotInSDK [system] {
 //--- BuildDir/HeaderNotFoundInSDK.h
 typedef int local_t;
 
-//--- client.m
+//--- client.c
 #include <A/A.h>
 #include <B/B.h>



More information about the cfe-commits mailing list