[clang] 05ec16d - [clang][deps] Avoid leaking modulemap paths across unrelated imports

Ben Langmuir via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 15 14:03:55 PST 2022


Author: Ben Langmuir
Date: 2022-11-15T13:59:26-08:00
New Revision: 05ec16d90deb747414c8534f98617d25d90bb714

URL: https://github.com/llvm/llvm-project/commit/05ec16d90deb747414c8534f98617d25d90bb714
DIFF: https://github.com/llvm/llvm-project/commit/05ec16d90deb747414c8534f98617d25d90bb714.diff

LOG: [clang][deps] Avoid leaking modulemap paths across unrelated imports

Use a FileEntryRef when retrieving modulemap paths in the scanner so
that we use a path compatible with the original module import, rather
than a FileEntry which can allow unrelated modules to leak paths into
how we build a module due to FileManager mutating the path.

Note: the current change prevents an "unrelated" path, but does not
change how VFS mapped paths are handled (which would be calling
getNameAsRequested) nor canonicalize the path.

Differential Revision: https://reviews.llvm.org/D137989

Added: 
    clang/test/ClangScanDeps/modules-symlink-dir-from-module.c

Modified: 
    clang/include/clang/Serialization/ASTReader.h
    clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
    clang/lib/Frontend/FrontendAction.cpp
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTWriter.cpp
    clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index da8b9a9207a03..23c67fc8828d3 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2338,8 +2338,7 @@ class ASTReader
   /// Visit all the top-level module maps loaded when building the given module
   /// file.
   void visitTopLevelModuleMaps(serialization::ModuleFile &MF,
-                               llvm::function_ref<
-                                   void(const FileEntry *)> Visitor);
+                               llvm::function_ref<void(FileEntryRef)> Visitor);
 
   bool isProcessingUpdateRecords() { return ProcessingUpdateRecords; }
 };

diff  --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
index 5062c4cc5e265..1cc6208a4c36a 100644
--- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
+++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
@@ -237,7 +237,7 @@ class ModuleDepCollector final : public DependencyCollector {
       llvm::function_ref<void(CompilerInvocation &)> Optimize) const;
 
   /// Collect module map files for given modules.
-  llvm::StringSet<>
+  llvm::DenseSet<const FileEntry *>
   collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
 
   /// Add module map files to the invocation, if needed.

diff  --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 45f04eac818da..76ef0cb3b4051 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -638,11 +638,10 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
         if (&MF != &PrimaryModule)
           CI.getFrontendOpts().ModuleFiles.push_back(MF.FileName);
 
-      ASTReader->visitTopLevelModuleMaps(
-          PrimaryModule, [&](const FileEntry *FE) {
-            CI.getFrontendOpts().ModuleMapFiles.push_back(
-                std::string(FE->getName()));
-          });
+      ASTReader->visitTopLevelModuleMaps(PrimaryModule, [&](FileEntryRef FE) {
+        CI.getFrontendOpts().ModuleMapFiles.push_back(
+            std::string(FE.getName()));
+      });
     }
 
     // Set up the input file for replay purposes.

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 05e6c6ea7952b..cf3716201dd80 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -9164,14 +9164,14 @@ void ASTReader::visitInputFiles(serialization::ModuleFile &MF,
 
 void ASTReader::visitTopLevelModuleMaps(
     serialization::ModuleFile &MF,
-    llvm::function_ref<void(const FileEntry *FE)> Visitor) {
+    llvm::function_ref<void(FileEntryRef FE)> Visitor) {
   unsigned NumInputs = MF.InputFilesLoaded.size();
   for (unsigned I = 0; I < NumInputs; ++I) {
     InputFileInfo IFI = readInputFileInfo(MF, I + 1);
     if (IFI.TopLevelModuleMap)
       // FIXME: This unnecessarily re-reads the InputFileInfo.
       if (auto FE = getInputFile(MF, I + 1).getFile())
-        Visitor(FE);
+        Visitor(*FE);
   }
 }
 

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 1dcb5426b314d..cdd65a4461d71 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1486,12 +1486,14 @@ namespace  {
 
 /// An input file.
 struct InputFileEntry {
-  const FileEntry *File;
+  FileEntryRef File;
   bool IsSystemFile;
   bool IsTransient;
   bool BufferOverridden;
   bool IsTopLevelModuleMap;
   uint32_t ContentHash[2];
+
+  InputFileEntry(FileEntryRef File) : File(File) {}
 };
 
 } // namespace
@@ -1541,8 +1543,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
     if (!IsSLocAffecting[I])
       continue;
 
-    InputFileEntry Entry;
-    Entry.File = Cache->OrigEntry;
+    InputFileEntry Entry(*Cache->OrigEntry);
     Entry.IsSystemFile = isSystem(File.getFileCharacteristic());
     Entry.IsTransient = Cache->IsTransient;
     Entry.BufferOverridden = Cache->BufferOverridden;
@@ -1557,9 +1558,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
       if (MemBuff)
         ContentHash = hash_value(MemBuff->getBuffer());
       else
-        // FIXME: The path should be taken from the FileEntryRef.
         PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content)
-            << Entry.File->getName();
+            << Entry.File.getName();
     }
     auto CH = llvm::APInt(64, ContentHash);
     Entry.ContentHash[0] =
@@ -1599,14 +1599,13 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
       RecordData::value_type Record[] = {
           INPUT_FILE,
           InputFileOffsets.size(),
-          (uint64_t)Entry.File->getSize(),
+          (uint64_t)Entry.File.getSize(),
           (uint64_t)getTimestampForOutput(Entry.File),
           Entry.BufferOverridden,
           Entry.IsTransient,
           Entry.IsTopLevelModuleMap};
 
-      // FIXME: The path should be taken from the FileEntryRef.
-      EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName());
+      EmitRecordWithPath(IFAbbrevCode, Record, Entry.File.getName());
     }
 
     // Emit content hash for this file.

diff  --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index f0fed7d12f3de..93720f73ce9b0 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -130,22 +130,22 @@ ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs(
 
   auto DepModuleMapFiles = collectModuleMapFiles(Deps.ClangModuleDeps);
   for (StringRef ModuleMapFile : Deps.ModuleMapFileDeps) {
+    // TODO: Track these as `FileEntryRef` to simplify the equality check below.
+    auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile);
+    assert(ModuleMapEntry && "module map file entry not found");
+
     // Don't report module maps describing eagerly-loaded dependency. This
     // information will be deserialized from the PCM.
     // TODO: Verify this works fine when modulemap for module A is eagerly
     // loaded from A.pcm, and module map passed on the command line contains
     // definition of a submodule: "explicit module A.Private { ... }".
-    if (EagerLoadModules && DepModuleMapFiles.contains(ModuleMapFile))
+    if (EagerLoadModules && DepModuleMapFiles.contains(*ModuleMapEntry))
       continue;
 
-    // TODO: Track these as `FileEntryRef` to simplify the equality check below.
-    auto ModuleMapEntry = ScanInstance.getFileManager().getFile(ModuleMapFile);
-    assert(ModuleMapEntry && "module map file entry not found");
-
     // Don't report module map file of the current module unless it also
     // describes a dependency (for symmetry).
     if (*ModuleMapEntry == *CurrentModuleMapEntry &&
-        !DepModuleMapFiles.contains(ModuleMapFile))
+        !DepModuleMapFiles.contains(*ModuleMapEntry))
       continue;
 
     CI.getFrontendOpts().ModuleMapFiles.emplace_back(ModuleMapFile);
@@ -181,13 +181,16 @@ ModuleDepCollector::makeInvocationForModuleBuildWithoutOutputs(
   return CI;
 }
 
-llvm::StringSet<> ModuleDepCollector::collectModuleMapFiles(
+llvm::DenseSet<const FileEntry *> ModuleDepCollector::collectModuleMapFiles(
     ArrayRef<ModuleID> ClangModuleDeps) const {
-  llvm::StringSet<> ModuleMapFiles;
+  llvm::DenseSet<const FileEntry *> ModuleMapFiles;
   for (const ModuleID &MID : ClangModuleDeps) {
     ModuleDeps *MD = ModuleDepsByID.lookup(MID);
     assert(MD && "Inconsistent dependency info");
-    ModuleMapFiles.insert(MD->ClangModuleMapFile);
+    // TODO: Track ClangModuleMapFile as `FileEntryRef`.
+    auto FE = ScanInstance.getFileManager().getFile(MD->ClangModuleMapFile);
+    assert(FE && "Missing module map file that was previously found");
+    ModuleMapFiles.insert(*FE);
   }
   return ModuleMapFiles;
 }
@@ -447,10 +450,10 @@ ModuleID ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
   addAllAffectingClangModules(M, MD, SeenDeps);
 
   MDC.ScanInstance.getASTReader()->visitTopLevelModuleMaps(
-      *MF, [&](const FileEntry *FE) {
-        if (FE->getName().endswith("__inferred_module.map"))
+      *MF, [&](FileEntryRef FE) {
+        if (FE.getName().endswith("__inferred_module.map"))
           return;
-        MD.ModuleMapFileDeps.emplace_back(FE->getName());
+        MD.ModuleMapFileDeps.emplace_back(FE.getName());
       });
 
   CompilerInvocation CI = MDC.makeInvocationForModuleBuildWithoutOutputs(

diff  --git a/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c b/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c
new file mode 100644
index 0000000000000..5dde7708c2538
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-symlink-dir-from-module.c
@@ -0,0 +1,94 @@
+// Check that the path of an imported modulemap file is not influenced by
+// modules outside that module's dependency graph. Specifically, the "Foo"
+// module below does not transitively import Mod via a symlink, so it should not
+// see the symlinked path.
+
+// REQUIRES: shell
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+// RUN: ln -s module %t/include/symlink-to-module
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 \
+// RUN:   -format experimental-full  -mode=preprocess-dependency-directives \
+// RUN:   -optimize-args -module-files-dir %t/build > %t/deps.json
+
+// RUN: cat %t/deps.json | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t
+
+// CHECK: "modules": [
+// CHECK:   {
+// CHECK:     "command-line": [
+// CHECK:       "-fmodule-map-file=[[PREFIX]]/include/module/module.modulemap"
+// CHECK:     ]
+// CHECK:     "name": "Foo"
+// CHECK:   }
+// CHECK:   {
+// CHECK:     "command-line": [
+// FIXME: canonicalize this path
+// CHECK:       "-fmodule-map-file=[[PREFIX]]/include/symlink-to-module/module.modulemap"
+// CHECK:     ]
+// CHECK:     "name": "Foo_Private"
+// CHECK:   }
+// CHECK:   {
+// CHECK:     "command-line": [
+// CHECK:       "[[PREFIX]]/include/module/module.modulemap"
+// CHECK:     ]
+// CHECK:     "name": "Mod"
+// CHECK:   }
+// CHECK:   {
+// CHECK:     "command-line": [
+// FIXME: canonicalize this path
+// CHECK:       "-fmodule-map-file=[[PREFIX]]/include/symlink-to-module/module.modulemap"
+// CHECK:     ]
+// CHECK:     "name": "Other"
+// CHECK:   }
+// CHECK:   {
+// CHECK:     "command-line": [
+// FIXME: canonicalize this path
+// CHECK:       "-fmodule-map-file=[[PREFIX]]/include/symlink-to-module/module.modulemap"
+// CHECK:     ]
+// CHECK:     "name": "Test"
+// CHECK:   }
+// CHECK: ]
+
+//--- cdb.json.in
+[{
+  "directory": "DIR",
+  "command": "clang -fsyntax-only DIR/test.c -F DIR/Frameworks -I DIR/include -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/module-cache",
+  "file": "DIR/test.c"
+}]
+
+//--- include/module/module.modulemap
+module Mod { header "mod.h" export * }
+
+//--- include/module/mod.h
+
+//--- include/module.modulemap
+module Other { header "other.h" export * }
+
+//--- include/other.h
+#include "symlink-to-module/mod.h"
+#include "module/mod.h"
+
+//--- Frameworks/Foo.framework/Modules/module.modulemap
+framework module Foo { header "Foo.h" export * }
+//--- Frameworks/Foo.framework/Modules/module.private.modulemap
+framework module Foo_Private { header "Priv.h" export * }
+
+//--- Frameworks/Foo.framework/Headers/Foo.h
+#include "module/mod.h"
+
+//--- Frameworks/Foo.framework/PrivateHeaders/Priv.h
+#include <Foo/Foo.h>
+#include "other.h"
+
+//--- module.modulemap
+module Test { header "test.h" export * }
+
+//--- test.h
+#include <Foo/Priv.h>
+#include <Foo/Foo.h>
+
+//--- test.c
+#include "test.h"


        


More information about the cfe-commits mailing list