[clang-tools-extra] 8fe8d69 - [clang][deps] Make clang-scan-deps write modules in raw format

Ben Langmuir via cfe-commits cfe-commits at lists.llvm.org
Wed May 3 12:12:55 PDT 2023


Author: Ben Langmuir
Date: 2023-05-03T12:07:46-07:00
New Revision: 8fe8d69ddf881db70cb8df31e614a06e633e2c5f

URL: https://github.com/llvm/llvm-project/commit/8fe8d69ddf881db70cb8df31e614a06e633e2c5f
DIFF: https://github.com/llvm/llvm-project/commit/8fe8d69ddf881db70cb8df31e614a06e633e2c5f.diff

LOG: [clang][deps] Make clang-scan-deps write modules in raw format

We have no use for debug info for the scanner modules, and writing raw
ast files speeds up scanning ~15% in some cases. Note that the compile
commands produced by the scanner will still build the obj format (if
requested), and the scanner can *read* obj format pcms, e.g. from a PCH.

rdar://108807592

Differential Revision: https://reviews.llvm.org/D149693

Added: 
    clang/test/ClangScanDeps/module-format.c

Modified: 
    clang-tools-extra/clangd/Compiler.cpp
    clang/include/clang/CodeGen/ObjectFilePCHContainerOperations.h
    clang/include/clang/Serialization/PCHContainerOperations.h
    clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
    clang/lib/Frontend/ASTUnit.cpp
    clang/lib/Serialization/PCHContainerOperations.cpp
    clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
    clang/tools/libclang/CIndex.cpp
    clang/tools/libclang/Indexing.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/Compiler.cpp b/clang-tools-extra/clangd/Compiler.cpp
index f242db44b4317..7b93353cab1f3 100644
--- a/clang-tools-extra/clangd/Compiler.cpp
+++ b/clang-tools-extra/clangd/Compiler.cpp
@@ -73,7 +73,7 @@ void disableUnsupportedOptions(CompilerInvocation &CI) {
   // Always default to raw container format as clangd doesn't registry any other
   // and clang dies when faced with unknown formats.
   CI.getHeaderSearchOpts().ModuleFormat =
-      PCHContainerOperations().getRawReader().getFormat().str();
+      PCHContainerOperations().getRawReader().getFormats().front().str();
 
   CI.getFrontendOpts().Plugins.clear();
   CI.getFrontendOpts().AddPluginActions.clear();

diff  --git a/clang/include/clang/CodeGen/ObjectFilePCHContainerOperations.h b/clang/include/clang/CodeGen/ObjectFilePCHContainerOperations.h
index c13e052149d95..7a02d8725885a 100644
--- a/clang/include/clang/CodeGen/ObjectFilePCHContainerOperations.h
+++ b/clang/include/clang/CodeGen/ObjectFilePCHContainerOperations.h
@@ -32,7 +32,7 @@ class ObjectFilePCHContainerWriter : public PCHContainerWriter {
 /// A PCHContainerReader implementation that uses LLVM to
 /// wraps Clang modules inside a COFF, ELF, or Mach-O container.
 class ObjectFilePCHContainerReader : public PCHContainerReader {
-  StringRef getFormat() const override { return "obj"; }
+  ArrayRef<StringRef> getFormats() const override;
 
   /// Returns the serialized AST inside the PCH container Buffer.
   StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const override;

diff  --git a/clang/include/clang/Serialization/PCHContainerOperations.h b/clang/include/clang/Serialization/PCHContainerOperations.h
index 9f9700a418a9c..be10feb5e351c 100644
--- a/clang/include/clang/Serialization/PCHContainerOperations.h
+++ b/clang/include/clang/Serialization/PCHContainerOperations.h
@@ -56,7 +56,7 @@ class PCHContainerReader {
 public:
   virtual ~PCHContainerReader() = 0;
   /// Equivalent to the format passed to -fmodule-format=
-  virtual llvm::StringRef getFormat() const = 0;
+  virtual llvm::ArrayRef<llvm::StringRef> getFormats() const = 0;
 
   /// Returns the serialized AST inside the PCH container Buffer.
   virtual llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const = 0;
@@ -78,8 +78,7 @@ class RawPCHContainerWriter : public PCHContainerWriter {
 
 /// Implements read operations for a raw pass-through PCH container.
 class RawPCHContainerReader : public PCHContainerReader {
-  llvm::StringRef getFormat() const override { return "raw"; }
-
+  llvm::ArrayRef<llvm::StringRef> getFormats() const override;
   /// Simply returns the buffer contained in Buffer.
   llvm::StringRef ExtractPCH(llvm::MemoryBufferRef Buffer) const override;
 };
@@ -87,7 +86,9 @@ class RawPCHContainerReader : public PCHContainerReader {
 /// A registry of PCHContainerWriter and -Reader objects for 
diff erent formats.
 class PCHContainerOperations {
   llvm::StringMap<std::unique_ptr<PCHContainerWriter>> Writers;
-  llvm::StringMap<std::unique_ptr<PCHContainerReader>> Readers;
+  llvm::StringMap<PCHContainerReader *> Readers;
+  llvm::SmallVector<std::unique_ptr<PCHContainerReader>> OwnedReaders;
+
 public:
   /// Automatically registers a RawPCHContainerWriter and
   /// RawPCHContainerReader.
@@ -96,13 +97,17 @@ class PCHContainerOperations {
     Writers[Writer->getFormat()] = std::move(Writer);
   }
   void registerReader(std::unique_ptr<PCHContainerReader> Reader) {
-    Readers[Reader->getFormat()] = std::move(Reader);
+    assert(!Reader->getFormats().empty() &&
+           "PCHContainerReader must handle >=1 format");
+    for (llvm::StringRef Fmt : Reader->getFormats())
+      Readers[Fmt] = Reader.get();
+    OwnedReaders.push_back(std::move(Reader));
   }
   const PCHContainerWriter *getWriterOrNull(llvm::StringRef Format) {
     return Writers[Format].get();
   }
   const PCHContainerReader *getReaderOrNull(llvm::StringRef Format) {
-    return Readers[Format].get();
+    return Readers[Format];
   }
   const PCHContainerReader &getRawReader() {
     return *getReaderOrNull("raw");

diff  --git a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index 07db4a63e2c10..114a9c1e2eac1 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -349,6 +349,11 @@ ObjectFilePCHContainerWriter::CreatePCHContainerGenerator(
       CI, MainFileName, OutputFileName, std::move(OS), Buffer);
 }
 
+ArrayRef<StringRef> ObjectFilePCHContainerReader::getFormats() const {
+  static StringRef Formats[] = {"obj", "raw"};
+  return Formats;
+}
+
 StringRef
 ObjectFilePCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const {
   StringRef PCH;

diff  --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index 3269c9d0f479f..bda49fb68e460 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -811,7 +811,7 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromASTFile(
                                      UserFilesAreVolatile);
   AST->ModuleCache = new InMemoryModuleCache;
   AST->HSOpts = std::make_shared<HeaderSearchOptions>();
-  AST->HSOpts->ModuleFormat = std::string(PCHContainerRdr.getFormat());
+  AST->HSOpts->ModuleFormat = std::string(PCHContainerRdr.getFormats().front());
   AST->HeaderInfo.reset(new HeaderSearch(AST->HSOpts,
                                          AST->getSourceManager(),
                                          AST->getDiagnostics(),

diff  --git a/clang/lib/Serialization/PCHContainerOperations.cpp b/clang/lib/Serialization/PCHContainerOperations.cpp
index d4990fce2d99d..56ca3394385b4 100644
--- a/clang/lib/Serialization/PCHContainerOperations.cpp
+++ b/clang/lib/Serialization/PCHContainerOperations.cpp
@@ -57,6 +57,11 @@ std::unique_ptr<ASTConsumer> RawPCHContainerWriter::CreatePCHContainerGenerator(
   return std::make_unique<RawPCHContainerGenerator>(std::move(OS), Buffer);
 }
 
+ArrayRef<llvm::StringRef> RawPCHContainerReader::getFormats() const {
+  static StringRef Raw("raw");
+  return ArrayRef(Raw);
+}
+
 StringRef
 RawPCHContainerReader::ExtractPCH(llvm::MemoryBufferRef Buffer) const {
   return Buffer.getBuffer();

diff  --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 937f39ba3bffa..baf76f01cfb54 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -181,6 +181,7 @@ class DependencyScanningAction : public tooling::ToolAction {
     ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
     ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
     ScanInstance.getFrontendOpts().ModulesShareFileManager = false;
+    ScanInstance.getHeaderSearchOpts().ModuleFormat = "raw";
 
     ScanInstance.setFileManager(FileMgr);
     // Support for virtual file system overlays.
@@ -309,12 +310,11 @@ DependencyScanningWorker::DependencyScanningWorker(
     : Format(Service.getFormat()), OptimizeArgs(Service.canOptimizeArgs()),
       EagerLoadModules(Service.shouldEagerLoadModules()) {
   PCHContainerOps = std::make_shared<PCHContainerOperations>();
+  // We need to read object files from PCH built outside the scanner.
   PCHContainerOps->registerReader(
       std::make_unique<ObjectFilePCHContainerReader>());
-  // We don't need to write object files, but the current PCH implementation
-  // requires the writer to be registered as well.
-  PCHContainerOps->registerWriter(
-      std::make_unique<ObjectFilePCHContainerWriter>());
+  // The scanner itself writes only raw ast files.
+  PCHContainerOps->registerWriter(std::make_unique<RawPCHContainerWriter>());
 
   switch (Service.getMode()) {
   case ScanningMode::DependencyDirectivesScan:

diff  --git a/clang/test/ClangScanDeps/module-format.c b/clang/test/ClangScanDeps/module-format.c
new file mode 100644
index 0000000000000..001a011ae0b59
--- /dev/null
+++ b/clang/test/ClangScanDeps/module-format.c
@@ -0,0 +1,64 @@
+// Check that the scanner produces raw ast files, even when builds produce the
+// obj format, and that the scanner can read obj format from PCH and modules
+// imported by PCH.
+
+// Unsupported on AIX because we don't support the requisite "__clangast"
+// section in XCOFF yet.
+// UNSUPPORTED: target={{.*}}-aix{{.*}}
+
+// REQUIRES: shell
+
+// RUN: rm -rf %t && mkdir %t
+// RUN: cp %S/Inputs/modules-pch/* %t
+
+// Scan dependencies of the PCH:
+//
+// RUN: rm -f %t/cdb_pch.json
+// RUN: sed "s|DIR|%/t|g" %S/Inputs/modules-pch/cdb_pch.json > %t/cdb_pch.json
+// RUN: clang-scan-deps -compilation-database %t/cdb_pch.json -format experimental-full \
+// RUN:   -module-files-dir %t/build > %t/result_pch.json
+
+// Explicitly build the PCH:
+//
+// RUN: %deps-to-rsp %t/result_pch.json --module-name=ModCommon1 > %t/mod_common_1.cc1.rsp
+// RUN: %deps-to-rsp %t/result_pch.json --module-name=ModCommon2 > %t/mod_common_2.cc1.rsp
+// RUN: %deps-to-rsp %t/result_pch.json --module-name=ModPCH > %t/mod_pch.cc1.rsp
+// RUN: %deps-to-rsp %t/result_pch.json --tu-index=0 > %t/pch.rsp
+//
+// RUN: %clang @%t/mod_common_1.cc1.rsp
+// RUN: %clang @%t/mod_common_2.cc1.rsp
+// RUN: %clang @%t/mod_pch.cc1.rsp
+// RUN: %clang @%t/pch.rsp
+
+// Scan dependencies of the TU:
+//
+// RUN: rm -f %t/cdb_tu.json
+// RUN: sed "s|DIR|%/t|g" %S/Inputs/modules-pch/cdb_tu.json > %t/cdb_tu.json
+// RUN: clang-scan-deps -compilation-database %t/cdb_tu.json -format experimental-full \
+// RUN:   -module-files-dir %t/build > %t/result_tu.json
+
+// Explicitly build the TU:
+//
+// RUN: %deps-to-rsp %t/result_tu.json --module-name=ModTU > %t/mod_tu.cc1.rsp
+// RUN: %deps-to-rsp %t/result_tu.json --tu-index=0 > %t/tu.rsp
+//
+// RUN: %clang @%t/mod_tu.cc1.rsp
+// RUN: %clang @%t/tu.rsp
+
+// Check the module format for scanner modules:
+//
+// RUN: find %t/cache -name "*.pcm" -exec %clang_cc1 -module-file-info "{}" ";" | FileCheck %s -check-prefix=SCAN
+// SCAN: Module format: raw
+// SCAN: Module format: raw
+// SCAN: Module format: raw
+// SCAN: Module format: raw
+
+// Check the module format for built modules:
+//
+// RUN: find %t/build -name "*.pcm" -exec %clang_cc1 -module-file-info "{}" ";" | FileCheck %s -check-prefix=BUILD
+// BUILD: Module format: obj
+// BUILD: Module format: obj
+// BUILD: Module format: obj
+// BUILD: Module format: obj
+
+// FIXME: check pch format as well; -module-file-info does not work with a PCH

diff  --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index bb536112e2094..448681bfe533a 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -3964,7 +3964,7 @@ clang_parseTranslationUnit_Impl(CXIndex CIdx, const char *source_filename,
       TUKind, CacheCodeCompletionResults, IncludeBriefCommentsInCodeCompletion,
       /*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies, SingleFileParse,
       /*UserFilesAreVolatile=*/true, ForSerialization, RetainExcludedCB,
-      CXXIdx->getPCHContainerOperations()->getRawReader().getFormat(),
+      CXXIdx->getPCHContainerOperations()->getRawReader().getFormats().front(),
       &ErrUnit));
 
   // Early failures in LoadFromCommandLine may return with ErrUnit unset.

diff  --git a/clang/tools/libclang/Indexing.cpp b/clang/tools/libclang/Indexing.cpp
index 24ca98d4de416..b6dfb1f47b598 100644
--- a/clang/tools/libclang/Indexing.cpp
+++ b/clang/tools/libclang/Indexing.cpp
@@ -552,7 +552,7 @@ static CXErrorCode clang_indexSourceFile_Impl(
 
   // Make sure to use the raw module format.
   CInvok->getHeaderSearchOpts().ModuleFormat = std::string(
-      CXXIdx->getPCHContainerOperations()->getRawReader().getFormat());
+      CXXIdx->getPCHContainerOperations()->getRawReader().getFormats().front());
 
   auto Unit = ASTUnit::create(CInvok, Diags, CaptureDiagnostics,
                               /*UserFilesAreVolatile=*/true);


        


More information about the cfe-commits mailing list