[clang] 074fcec - [clang][deps] Canonicalize module map path

Ben Langmuir via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 5 15:46:58 PDT 2022


Author: Ben Langmuir
Date: 2022-10-05T15:42:38-07:00
New Revision: 074fcec1eabfc992c46c95df215b1caf5cf58970

URL: https://github.com/llvm/llvm-project/commit/074fcec1eabfc992c46c95df215b1caf5cf58970
DIFF: https://github.com/llvm/llvm-project/commit/074fcec1eabfc992c46c95df215b1caf5cf58970.diff

LOG: [clang][deps] Canonicalize module map path

When dep-scanning, canonicalize the module map path as much as we can.
This avoids unnecessarily needing to build multiple versions of a module
due to symlinks or case-insensitive file paths.

Despite the name `tryGetRealPathName`, the previous implementation did
not actually return the realpath most of the time, and indeed it would
be incorrect to do so since the realpath could be outside the module
directory, which would have broken finding headers relative to the
module.

Instead, use a canonicalization that is specific to the needs of
modulemap files (canonicalize the directory separately from the
filename).

Differential Revision: https://reviews.llvm.org/D134923

Added: 
    clang/test/ClangScanDeps/modules-symlink-dir.c

Modified: 
    clang/include/clang/Lex/ModuleMap.h
    clang/lib/Lex/HeaderSearch.cpp
    clang/lib/Lex/ModuleMap.cpp
    clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index 1084b49b3534a..10c5dfc25d9ce 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -622,6 +622,15 @@ class ModuleMap {
 
   void setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap);
 
+  /// Canonicalize \p Path in a manner suitable for a module map file. In
+  /// particular, this canonicalizes the parent directory separately from the
+  /// filename so that it does not affect header resolution relative to the
+  /// modulemap.
+  ///
+  /// \returns an error code if any filesystem operations failed. In this case
+  /// \p Path is not modified.
+  std::error_code canonicalizeModuleMapPath(SmallVectorImpl<char> &Path);
+
   /// Get any module map files other than getModuleMapFileForUniquing(M)
   /// that define submodules of a top-level module \p M. This is cheaper than
   /// getting the module map file for each submodule individually, since the

diff  --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 99596b1378ba1..73f9678834969 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -255,18 +255,11 @@ std::string HeaderSearch::getCachedModuleFileNameImpl(StringRef ModuleName,
     //
     // To avoid false-negatives, we form as canonical a path as we can, and map
     // to lower-case in case we're on a case-insensitive file system.
-    std::string Parent =
-        std::string(llvm::sys::path::parent_path(ModuleMapPath));
-    if (Parent.empty())
-      Parent = ".";
-    auto Dir = FileMgr.getDirectory(Parent);
-    if (!Dir)
+    SmallString<128> CanonicalPath(ModuleMapPath);
+    if (getModuleMap().canonicalizeModuleMapPath(CanonicalPath))
       return {};
-    auto DirName = FileMgr.getCanonicalName(*Dir);
-    auto FileName = llvm::sys::path::filename(ModuleMapPath);
 
-    llvm::hash_code Hash =
-      llvm::hash_combine(DirName.lower(), FileName.lower());
+    llvm::hash_code Hash = llvm::hash_combine(CanonicalPath.str().lower());
 
     SmallString<128> HashStr;
     llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36);

diff  --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index dbb81dc0d1478..cbd3303f36636 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -1303,6 +1303,42 @@ void ModuleMap::setInferredModuleAllowedBy(Module *M, const FileEntry *ModMap) {
   InferredModuleAllowedBy[M] = ModMap;
 }
 
+std::error_code
+ModuleMap::canonicalizeModuleMapPath(SmallVectorImpl<char> &Path) {
+  StringRef Dir = llvm::sys::path::parent_path({Path.data(), Path.size()});
+
+  // Do not canonicalize within the framework; the module map parser expects
+  // Modules/ not Versions/A/Modules.
+  if (llvm::sys::path::filename(Dir) == "Modules") {
+    StringRef Parent = llvm::sys::path::parent_path(Dir);
+    if (Parent.endswith(".framework"))
+      Dir = Parent;
+  }
+
+  FileManager &FM = SourceMgr.getFileManager();
+  auto DirEntry = FM.getDirectory(Dir.empty() ? "." : Dir);
+  if (!DirEntry)
+    return DirEntry.getError();
+
+  // Canonicalize the directory.
+  StringRef CanonicalDir = FM.getCanonicalName(*DirEntry);
+  if (CanonicalDir != Dir) {
+    bool Done = llvm::sys::path::replace_path_prefix(Path, Dir, CanonicalDir);
+    (void)Done;
+    assert(Done && "Path should always start with Dir");
+  }
+
+  // In theory, the filename component should also be canonicalized if it
+  // on a case-insensitive filesystem. However, the extra canonicalization is
+  // expensive and if clang looked up the filename it will always be lowercase.
+
+  // Remove ., remove redundant separators, and switch to native separators.
+  // This is needed for separators between CanonicalDir and the filename.
+  llvm::sys::path::remove_dots(Path);
+
+  return std::error_code();
+}
+
 void ModuleMap::addAdditionalModuleMapFile(const Module *M,
                                            const FileEntry *ModuleMap) {
   AdditionalModMaps[M].insert(ModuleMap);

diff  --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index ffb60f17e930e..f38ed7be4ddda 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -406,15 +406,14 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
   MD.ImplicitModulePCMPath = std::string(M->getASTFile()->getName());
   MD.IsSystem = M->IsSystem;
 
-  Optional<FileEntryRef> ModuleMap = MDC.ScanInstance.getPreprocessor()
-                                         .getHeaderSearchInfo()
-                                         .getModuleMap()
-                                         .getModuleMapFileForUniquing(M);
+  ModuleMap &ModMapInfo =
+      MDC.ScanInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap();
+
+  Optional<FileEntryRef> ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M);
 
   if (ModuleMap) {
-    StringRef Path = ModuleMap->getFileEntry().tryGetRealPathName();
-    if (Path.empty())
-      Path = ModuleMap->getName();
+    SmallString<128> Path = ModuleMap->getNameAsRequested();
+    ModMapInfo.canonicalizeModuleMapPath(Path);
     MD.ClangModuleMapFile = std::string(Path);
   }
 

diff  --git a/clang/test/ClangScanDeps/modules-symlink-dir.c b/clang/test/ClangScanDeps/modules-symlink-dir.c
new file mode 100644
index 0000000000000..810b88942d2aa
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-symlink-dir.c
@@ -0,0 +1,131 @@
+// Check that we canonicalize the module map path without changing the module
+// directory, which would break header lookup.
+
+// REQUIRES: shell
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+// RUN: ln -s module %t/symlink-to-module
+// RUN: ln -s ../actual.modulemap %t/module/module.modulemap
+// RUN: ln -s A %t/module/F.framework/Versions/Current
+// RUN: ln -s Versions/Current/Modules %t/module/F.framework/Modules
+// RUN: ln -s Versions/Current/Headers %t/module/F.framework/Headers
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \
+// RUN:   -format experimental-full  -mode=preprocess-dependency-directives \
+// RUN:   -optimize-args -module-files-dir %t/build > %t/deps.json
+
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s
+
+// Check the module commands actually build.
+// RUN: %deps-to-rsp %t/deps.json --module-name=Mod > %t/Mod.rsp
+// RUN: %deps-to-rsp %t/deps.json --module-name=F > %t/F.rsp
+// RUN: %clang @%t/Mod.rsp
+// RUN: %clang @%t/F.rsp
+
+// CHECK:      "modules": [
+// CHECK:        {
+// CHECK:          "clang-module-deps": [],
+// CHECK:          "clang-modulemap-file": "[[PREFIX]]/module/F.framework/Modules/module.modulemap"
+// CHECK:          "command-line": [
+// CHECK-NOT: symlink-to-module
+// CHECK:            "[[PREFIX]]/module/F.framework/Modules/module.modulemap"
+// CHECK-NOT: symlink-to-module
+// CHECK:          ]
+// CHECK:          "context-hash": "[[F_CONTEXT_HASH:[A-Z0-9]+]]"
+// CHECK:          "name": "F"
+// CHECK-NEXT:   }
+// CHECK-NEXT:   {
+// CHECK:          "clang-modulemap-file": "[[PREFIX]]/module/module.modulemap"
+// CHECK:          "command-line": [
+// CHECK-NOT: symlink-to-module
+// CHECK:            "[[PREFIX]]/module/module.modulemap"
+// CHECK-NOT: symlink-to-module
+// CHECK:          ]
+// CHECK:          "context-hash": "[[CONTEXT_HASH:[A-Z0-9]+]]"
+// CHECK:          "name": "Mod"
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK:      "translation-units": [
+// CHECK:              "clang-module-deps": [
+// CHECK:                {
+// CHECK:                  "context-hash": "[[CONTEXT_HASH]]"
+// CHECK:                  "module-name": "Mod"
+// CHECK:                }
+// CHECK-NEXT:         ],
+// CHECK:              "command-line": [
+// CHECK:                "-fmodule-map-file=[[PREFIX]]/module/module.modulemap"
+// CHECK:              ]
+// CHECK:              "clang-module-deps": [
+// CHECK:                {
+// CHECK:                  "context-hash": "[[CONTEXT_HASH]]"
+// CHECK:                  "module-name": "Mod"
+// CHECK:                }
+// CHECK-NEXT:         ]
+// CHECK:              "command-line": [
+// CHECK:                "-fmodule-map-file=[[PREFIX]]/module/module.modulemap"
+// CHECK:              ]
+// CHECK:              "clang-module-deps": [
+// CHECK:                {
+// CHECK:                  "context-hash": "[[F_CONTEXT_HASH]]"
+// CHECK:                  "module-name": "F"
+// CHECK:                }
+// CHECK-NEXT:         ]
+// CHECK:              "command-line": [
+// CHECK:                "-fmodule-map-file=[[PREFIX]]/module/F.framework/Modules/module.modulemap"
+// CHECK:              ]
+// CHECK:              "clang-module-deps": [
+// CHECK:                {
+// CHECK:                  "context-hash": "[[F_CONTEXT_HASH]]"
+// CHECK:                  "module-name": "F"
+// CHECK:                }
+// CHECK-NEXT:         ]
+// CHECK:              "command-line": [
+// CHECK:                "-fmodule-map-file=[[PREFIX]]/module/F.framework/Modules/module.modulemap"
+// CHECK:              ]
+
+//--- cdb.json.in
+[
+  {
+    "directory": "DIR",
+    "command": "clang -fsyntax-only DIR/tu1.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu1.c"
+  },
+  {
+    "directory": "DIR",
+    "command": "clang -fsyntax-only DIR/tu2.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu2.c"
+  },
+  {
+    "directory": "DIR",
+    "command": "clang -fsyntax-only -F DIR/symlink-to-module DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu3.c"
+  },
+  {
+    "directory": "DIR",
+    "command": "clang -fsyntax-only -F DIR/module DIR/tu3.c -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache",
+    "file": "DIR/tu3.c"
+  },
+]
+
+//--- actual.modulemap
+module Mod { header "header.h" }
+
+//--- module/header.h
+
+//--- tu1.c
+#include "symlink-to-module/header.h"
+
+//--- tu2.c
+#include "module/header.h"
+
+//--- module/F.framework/Versions/A/Modules/module.modulemap
+framework module F {
+  umbrella header "F.h"
+}
+
+//--- module/F.framework/Versions/A/Headers/F.h
+
+//--- tu3.c
+#include "F/F.h"


        


More information about the cfe-commits mailing list