[clang] 766a08d - [Frontend] Only compile modules if not already finalized
    Volodymyr Sapsai via cfe-commits 
    cfe-commits at lists.llvm.org
       
    Thu Jul 15 18:28:02 PDT 2021
    
    
  
Author: Ben Barham
Date: 2021-07-15T18:27:08-07:00
New Revision: 766a08df12c111b15ed51d0fcac06042d2f68cd6
URL: https://github.com/llvm/llvm-project/commit/766a08df12c111b15ed51d0fcac06042d2f68cd6
DIFF: https://github.com/llvm/llvm-project/commit/766a08df12c111b15ed51d0fcac06042d2f68cd6.diff
LOG: [Frontend] Only compile modules if not already finalized
It was possible to re-add a module to a shared in-memory module cache
when search paths are changed. This can eventually cause a crash if the
original module is referenced after this occurs.
  1. Module A depends on B
  2. B exists in two paths C and D
  3. First run only has C on the search path, finds A and B and loads
     them
  4. Second run adds D to the front of the search path. A is loaded and
     contains a reference to the already compiled module from C. But
     searching finds the module from D instead, causing a mismatch
  5. B and the modules that depend on it are considered out of date and
     thus rebuilt
  6. The recompiled module A is added to the in-memory cache, freeing
     the previously inserted one
This can never occur from a regular clang process, but is very easy to
do through the API - whether through the use of a shared case or just
running multiple compilations from a single `CompilerInstance`. Update
the compilation to return early if a module is already finalized so that
the pre-condition in the in-memory module cache holds.
Resolves rdar://78180255
Differential Revision: https://reviews.llvm.org/D105328
Added: 
    clang/unittests/Serialization/ModuleCacheTest.cpp
Modified: 
    clang/include/clang/Basic/DiagnosticCommonKinds.td
    clang/include/clang/Basic/DiagnosticSerializationKinds.td
    clang/include/clang/Serialization/ASTReader.h
    clang/lib/Frontend/CompilerInstance.cpp
    clang/lib/Serialization/ASTReader.cpp
    clang/unittests/Serialization/CMakeLists.txt
Removed: 
    
################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index eea5d8eaa10a1..4dff3379ed35b 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -111,6 +111,8 @@ def err_module_cycle : Error<"cyclic dependency in module '%0': %1">,
   DefaultFatal;
 def err_module_prebuilt : Error<
   "error in loading module '%0' from prebuilt module path">, DefaultFatal;
+def err_module_rebuild_finalized : Error<
+  "cannot rebuild module '%0' as it is already finalized">, DefaultFatal;
 def note_pragma_entered_here : Note<"#pragma entered here">;
 def note_decl_hiding_tag_type : Note<
   "%1 %0 is hidden by a non-type declaration of %0 here">;
diff  --git a/clang/include/clang/Basic/DiagnosticSerializationKinds.td b/clang/include/clang/Basic/DiagnosticSerializationKinds.td
index ce48833a87030..bf3221be004de 100644
--- a/clang/include/clang/Basic/DiagnosticSerializationKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSerializationKinds.td
@@ -69,6 +69,9 @@ def err_module_file_not_module : Error<
   "AST file '%0' was not built as a module">, DefaultFatal;
 def err_module_file_missing_top_level_submodule : Error<
   "module file '%0' is missing its top-level submodule">, DefaultFatal;
+def note_module_file_conflict : Note<
+  "this is generally caused by modules with the same name found in multiple "
+  "paths">;
 
 def remark_module_import : Remark<
   "importing module '%0'%select{| into '%3'}2 from '%1'">,
diff  --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 6932d9c86d0cc..4819a5aecefaf 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1401,6 +1401,9 @@ class ASTReader
   llvm::iterator_range<PreprocessingRecord::iterator>
   getModulePreprocessedEntities(ModuleFile &Mod) const;
 
+  bool canRecoverFromOutOfDate(StringRef ModuleFileName,
+                               unsigned ClientLoadCapabilities);
+
 public:
   class ModuleDeclIterator
       : public llvm::iterator_adaptor_base<
diff  --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index a6e2329c8864a..c642af1849bc4 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -1054,6 +1054,15 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc,
                       [](CompilerInstance &) {}) {
   llvm::TimeTraceScope TimeScope("Module Compile", ModuleName);
 
+  // Never compile a module that's already finalized - this would cause the
+  // existing module to be freed, causing crashes if it is later referenced
+  if (ImportingInstance.getModuleCache().isPCMFinal(ModuleFileName)) {
+    ImportingInstance.getDiagnostics().Report(
+        ImportLoc, diag::err_module_rebuild_finalized)
+        << ModuleName;
+    return false;
+  }
+
   // Construct a compiler invocation for creating this module.
   auto Invocation =
       std::make_shared<CompilerInvocation>(ImportingInstance.getInvocation());
diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 98972af5957ec..faf6f3dc5d953 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2764,7 +2764,7 @@ ASTReader::ReadControlBlock(ModuleFile &F,
         // If requested by the caller and the module hasn't already been read
         // or compiled, mark modules on error as out-of-date.
         if ((ClientLoadCapabilities & ARR_TreatModuleWithErrorsAsOutOfDate) &&
-            !ModuleMgr.getModuleCache().isPCMFinal(F.FileName))
+            canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
           return OutOfDate;
 
         if (!AllowASTWithCompilerErrors) {
@@ -2850,9 +2850,14 @@ ASTReader::ReadControlBlock(ModuleFile &F,
                                   StoredSignature, Capabilities);
 
         // If we diagnosed a problem, produce a backtrace.
-        if (isDiagnosedResult(Result, Capabilities))
+        bool recompilingFinalized =
+            Result == OutOfDate && (Capabilities & ARR_OutOfDate) &&
+            getModuleManager().getModuleCache().isPCMFinal(F.FileName);
+        if (isDiagnosedResult(Result, Capabilities) || recompilingFinalized)
           Diag(diag::note_module_file_imported_by)
               << F.FileName << !F.ModuleName.empty() << F.ModuleName;
+        if (recompilingFinalized)
+          Diag(diag::note_module_file_conflict);
 
         switch (Result) {
         case Failure: return Failure;
@@ -2918,7 +2923,7 @@ ASTReader::ReadControlBlock(ModuleFile &F,
             F.Kind != MK_ExplicitModule && F.Kind != MK_PrebuiltModule) {
           auto BuildDir = PP.getFileManager().getDirectory(Blob);
           if (!BuildDir || *BuildDir != M->Directory) {
-            if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
+            if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
               Diag(diag::err_imported_module_relocated)
                   << F.ModuleName << Blob << M->Directory->getName();
             return OutOfDate;
@@ -3928,7 +3933,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
     if (!bool(PP.getPreprocessorOpts().DisablePCHOrModuleValidation &
               DisableValidationForModuleKind::Module) &&
         !ModMap) {
-      if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) {
+      if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities)) {
         if (auto ASTFE = M ? M->getASTFile() : None) {
           // This module was defined by an imported (explicit) module.
           Diag(diag::err_module_file_conflict) << F.ModuleName << F.FileName
@@ -3959,7 +3964,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
       assert((ImportedBy || F.Kind == MK_ImplicitModule) &&
              "top-level import should be verified");
       bool NotImported = F.Kind == MK_ImplicitModule && !ImportedBy;
-      if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
+      if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
         Diag(diag::err_imported_module_modmap_changed)
             << F.ModuleName << (NotImported ? F.FileName : ImportedBy->FileName)
             << ModMap->getName() << F.ModuleMapPath << NotImported;
@@ -3970,13 +3975,13 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
     for (unsigned I = 0, N = Record[Idx++]; I < N; ++I) {
       // FIXME: we should use input files rather than storing names.
       std::string Filename = ReadPath(F, Record, Idx);
-      auto F = FileMgr.getFile(Filename, false, false);
-      if (!F) {
-        if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
+      auto SF = FileMgr.getFile(Filename, false, false);
+      if (!SF) {
+        if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
           Error("could not find file '" + Filename +"' referenced by AST file");
         return OutOfDate;
       }
-      AdditionalStoredMaps.insert(*F);
+      AdditionalStoredMaps.insert(*SF);
     }
 
     // Check any additional module map files (e.g. module.private.modulemap)
@@ -3986,7 +3991,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
         // Remove files that match
         // Note: SmallPtrSet::erase is really remove
         if (!AdditionalStoredMaps.erase(ModMap)) {
-          if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
+          if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
             Diag(diag::err_module_
diff erent_modmap)
               << F.ModuleName << /*new*/0 << ModMap->getName();
           return OutOfDate;
@@ -3997,7 +4002,7 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
     // Check any additional module map files that are in the pcm, but not
     // found in header search. Cases that match are already removed.
     for (const FileEntry *ModMap : AdditionalStoredMaps) {
-      if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
+      if (!canRecoverFromOutOfDate(F.FileName, ClientLoadCapabilities))
         Diag(diag::err_module_
diff erent_modmap)
           << F.ModuleName << /*not new*/1 << ModMap->getName();
       return OutOfDate;
@@ -5924,6 +5929,12 @@ ASTReader::getModulePreprocessedEntities(ModuleFile &Mod) const {
                           PreprocessingRecord::iterator());
 }
 
+bool ASTReader::canRecoverFromOutOfDate(StringRef ModuleFileName,
+                                        unsigned int ClientLoadCapabilities) {
+  return ClientLoadCapabilities & ARR_OutOfDate &&
+         !getModuleManager().getModuleCache().isPCMFinal(ModuleFileName);
+}
+
 llvm::iterator_range<ASTReader::ModuleDeclIterator>
 ASTReader::getModuleFileLevelDecls(ModuleFile &Mod) {
   return llvm::make_range(
diff  --git a/clang/unittests/Serialization/CMakeLists.txt b/clang/unittests/Serialization/CMakeLists.txt
index f143f28e12326..c0a79842525b2 100644
--- a/clang/unittests/Serialization/CMakeLists.txt
+++ b/clang/unittests/Serialization/CMakeLists.txt
@@ -6,12 +6,14 @@ set(LLVM_LINK_COMPONENTS
 
 add_clang_unittest(SerializationTests
   InMemoryModuleCacheTest.cpp
+  ModuleCacheTest.cpp
   )
 
 clang_target_link_libraries(SerializationTests
   PRIVATE
   clangAST
   clangBasic
+  clangFrontend
   clangLex
   clangSema
   clangSerialization
diff  --git a/clang/unittests/Serialization/ModuleCacheTest.cpp b/clang/unittests/Serialization/ModuleCacheTest.cpp
new file mode 100644
index 0000000000000..6cffbc2c12218
--- /dev/null
+++ b/clang/unittests/Serialization/ModuleCacheTest.cpp
@@ -0,0 +1,179 @@
+//===- unittests/Serialization/ModuleCacheTest.cpp - CI tests -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/FileManager.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace clang;
+
+namespace {
+
+class ModuleCacheTest : public ::testing::Test {
+  void SetUp() override {
+    ASSERT_FALSE(sys::fs::createUniqueDirectory("modulecache-test", TestDir));
+
+    ModuleCachePath = SmallString<256>(TestDir);
+    sys::path::append(ModuleCachePath, "mcp");
+    ASSERT_FALSE(sys::fs::create_directories(ModuleCachePath));
+  }
+
+  void TearDown() override { sys::fs::remove_directories(TestDir); }
+
+public:
+  SmallString<256> TestDir;
+  SmallString<256> ModuleCachePath;
+
+  void addFile(StringRef Path, StringRef Contents) {
+    ASSERT_FALSE(sys::path::is_absolute(Path));
+
+    SmallString<256> AbsPath(TestDir);
+    sys::path::append(AbsPath, Path);
+
+    std::error_code EC;
+    ASSERT_FALSE(
+        sys::fs::create_directories(llvm::sys::path::parent_path(AbsPath)));
+    llvm::raw_fd_ostream OS(AbsPath, EC);
+    ASSERT_FALSE(EC);
+    OS << Contents;
+  }
+
+  void addDuplicateFrameworks() {
+    addFile("test.m", R"cpp(
+        @import Top;
+    )cpp");
+
+    addFile("frameworks/Top.framework/Headers/top.h", R"cpp(
+        @import M;
+    )cpp");
+    addFile("frameworks/Top.framework/Modules/module.modulemap", R"cpp(
+        framework module Top [system] {
+          header "top.h"
+          export *
+        }
+    )cpp");
+
+    addFile("frameworks/M.framework/Headers/m.h", R"cpp(
+        void foo();
+    )cpp");
+    addFile("frameworks/M.framework/Modules/module.modulemap", R"cpp(
+        framework module M [system] {
+          header "m.h"
+          export *
+        }
+    )cpp");
+
+    addFile("frameworks2/M.framework/Headers/m.h", R"cpp(
+        void foo();
+    )cpp");
+    addFile("frameworks2/M.framework/Modules/module.modulemap", R"cpp(
+        framework module M [system] {
+          header "m.h"
+          export *
+        }
+    )cpp");
+  }
+};
+
+TEST_F(ModuleCacheTest, CachedModuleNewPath) {
+  addDuplicateFrameworks();
+
+  SmallString<256> MCPArg("-fmodules-cache-path=");
+  MCPArg.append(ModuleCachePath);
+  IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
+      CompilerInstance::createDiagnostics(new DiagnosticOptions());
+
+  // First run should pass with no errors
+  const char *Args[] = {"clang",        "-fmodules",          "-Fframeworks",
+                        MCPArg.c_str(), "-working-directory", TestDir.c_str(),
+                        "test.m"};
+  std::shared_ptr<CompilerInvocation> Invocation =
+      createInvocationFromCommandLine(Args, Diags);
+  ASSERT_TRUE(Invocation);
+  CompilerInstance Instance;
+  Instance.setDiagnostics(Diags.get());
+  Instance.setInvocation(Invocation);
+  SyntaxOnlyAction Action;
+  ASSERT_TRUE(Instance.ExecuteAction(Action));
+  ASSERT_FALSE(Diags->hasErrorOccurred());
+
+  // Now add `frameworks2` to the search path. `Top.pcm` will have a reference
+  // to the `M` from `frameworks`, but a search will find the `M` from
+  // `frameworks2` - causing a mismatch and it to be considered out of date.
+  //
+  // Normally this would be fine - `M` and the modules it depends on would be
+  // rebuilt. However, since we have a shared module cache and thus an already
+  // finalized `Top`, recompiling `Top` will cause the existing module to be
+  // removed from the cache, causing possible crashed if it is ever used.
+  //
+  // Make sure that an error occurs instead.
+  const char *Args2[] = {"clang",         "-fmodules",    "-Fframeworks2",
+                         "-Fframeworks",  MCPArg.c_str(), "-working-directory",
+                         TestDir.c_str(), "test.m"};
+  std::shared_ptr<CompilerInvocation> Invocation2 =
+      createInvocationFromCommandLine(Args2, Diags);
+  ASSERT_TRUE(Invocation2);
+  CompilerInstance Instance2(Instance.getPCHContainerOperations(),
+                             &Instance.getModuleCache());
+  Instance2.setDiagnostics(Diags.get());
+  Instance2.setInvocation(Invocation2);
+  SyntaxOnlyAction Action2;
+  ASSERT_FALSE(Instance2.ExecuteAction(Action2));
+  ASSERT_TRUE(Diags->hasErrorOccurred());
+}
+
+TEST_F(ModuleCacheTest, CachedModuleNewPathAllowErrors) {
+  addDuplicateFrameworks();
+
+  SmallString<256> MCPArg("-fmodules-cache-path=");
+  MCPArg.append(ModuleCachePath);
+  IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
+      CompilerInstance::createDiagnostics(new DiagnosticOptions());
+
+  // First run should pass with no errors
+  const char *Args[] = {"clang",        "-fmodules",          "-Fframeworks",
+                        MCPArg.c_str(), "-working-directory", TestDir.c_str(),
+                        "test.m"};
+  std::shared_ptr<CompilerInvocation> Invocation =
+      createInvocationFromCommandLine(Args, Diags);
+  ASSERT_TRUE(Invocation);
+  CompilerInstance Instance;
+  Instance.setDiagnostics(Diags.get());
+  Instance.setInvocation(Invocation);
+  SyntaxOnlyAction Action;
+  ASSERT_TRUE(Instance.ExecuteAction(Action));
+  ASSERT_FALSE(Diags->hasErrorOccurred());
+
+  // Same as `CachedModuleNewPath` but while allowing errors. This is a hard
+  // failure where the module wasn't created, so it should still fail.
+  const char *Args2[] = {
+      "clang",         "-fmodules",    "-Fframeworks2",
+      "-Fframeworks",  MCPArg.c_str(), "-working-directory",
+      TestDir.c_str(), "-Xclang",      "-fallow-pcm-with-compiler-errors",
+      "test.m"};
+  std::shared_ptr<CompilerInvocation> Invocation2 =
+      createInvocationFromCommandLine(Args2, Diags);
+  ASSERT_TRUE(Invocation2);
+  CompilerInstance Instance2(Instance.getPCHContainerOperations(),
+                             &Instance.getModuleCache());
+  Instance2.setDiagnostics(Diags.get());
+  Instance2.setInvocation(Invocation2);
+  SyntaxOnlyAction Action2;
+  ASSERT_FALSE(Instance2.ExecuteAction(Action2));
+  ASSERT_TRUE(Diags->hasErrorOccurred());
+}
+
+} // anonymous namespace
        
    
    
More information about the cfe-commits
mailing list