[cfe-commits] r173301 - in /cfe/trunk: include/clang/Driver/CC1Options.td include/clang/Frontend/CompilerInstance.h include/clang/Frontend/FrontendOptions.h include/clang/Serialization/GlobalModuleIndex.h lib/Frontend/CompilerInstance.cpp lib/Frontend/CompilerInvocation.cpp lib/Frontend/FrontendAction.cpp lib/Serialization/ASTReader.cpp lib/Serialization/CMakeLists.txt lib/Serialization/GlobalModuleIndex.cpp test/Modules/global_index.m

NAKAMURA Takumi geek4civic at gmail.com
Thu Jan 24 00:26:10 PST 2013


2013/1/24 Douglas Gregor <dgregor at apple.com>:
> Author: dgregor
> Date: Wed Jan 23 16:38:11 2013
> New Revision: 173301
>
> URL: http://llvm.org/viewvc/llvm-project?rev=173301&view=rev
> Log:
> Implement the writer side of the global module index.
>
> The global module index is a "global" index for all of the module
> files within a particular subdirectory in the module cache, which
> keeps track of all of the "interesting" identifiers and selectors
> known in each of the module files. One can perform a fast lookup in
> the index to determine which module files will have more information
> about entities with a particular name/selector. This information can
> help eliminate redundant lookups into module files (a serious
> performance problem) and help with creating auto-import/auto-include
> Fix-Its.
>
> The global module index is created or updated at the end of a
> translation unit that has triggered a (re)build of a module by
> scraping all of the .pcm files out of the module cache subdirectory,
> so it catches everything. As with module rebuilds, we use the file
> system's atomicity to synchronize.
>
>
> Added:
>     cfe/trunk/include/clang/Serialization/GlobalModuleIndex.h
>     cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp
>     cfe/trunk/test/Modules/global_index.m
> Modified:
>     cfe/trunk/include/clang/Driver/CC1Options.td
>     cfe/trunk/include/clang/Frontend/CompilerInstance.h
>     cfe/trunk/include/clang/Frontend/FrontendOptions.h
>     cfe/trunk/lib/Frontend/CompilerInstance.cpp
>     cfe/trunk/lib/Frontend/CompilerInvocation.cpp
>     cfe/trunk/lib/Frontend/FrontendAction.cpp
>     cfe/trunk/lib/Serialization/ASTReader.cpp
>     cfe/trunk/lib/Serialization/CMakeLists.txt

> Added: cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp?rev=173301&view=auto
> ==============================================================================
> --- cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp (added)
> +++ cfe/trunk/lib/Serialization/GlobalModuleIndex.cpp Wed Jan 23 16:38:11 2013
> @@ -0,0 +1,535 @@
> +//===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file implements the GlobalModuleIndex class.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "ASTReaderInternals.h"
> +#include "clang/Basic/FileManager.h"
> +#include "clang/Basic/OnDiskHashTable.h"
> +#include "clang/Serialization/ASTBitCodes.h"
> +#include "clang/Serialization/GlobalModuleIndex.h"
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/MapVector.h"
> +#include "llvm/ADT/SmallString.h"
> +#include "llvm/ADT/StringExtras.h"
> +#include "llvm/Bitcode/BitstreamReader.h"
> +#include "llvm/Bitcode/BitstreamWriter.h"
> +#include "llvm/Support/Filesystem.h"
> +#include "llvm/Support/LockFileManager.h"
> +#include "llvm/Support/MemoryBuffer.h"
> +#include "llvm/Support/PathV2.h"
> +using namespace clang;
> +using namespace serialization;
> +
> +//----------------------------------------------------------------------------//
> +// Shared constants
> +//----------------------------------------------------------------------------//
> +namespace {
> +  enum {
> +    /// \brief The block containing the index.
> +    GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
> +  };
> +
> +  /// \brief Describes the record types in the index.
> +  enum IndexRecordTypes {
> +    /// \brief Contains version information and potentially other metadata,
> +    /// used to determine if we can read this global index file.
> +    METADATA,
> +    /// \brief Describes a module, including its file name and dependencies.
> +    MODULE,
> +    /// \brief The index for identifiers.
> +    IDENTIFIER_INDEX
> +  };
> +}
> +
> +/// \brief The name of the global index file.
> +static const char * const IndexFileName = "modules.idx";
> +
> +/// \brief The global index file version.
> +static const unsigned CurrentVersion = 1;
> +
> +//----------------------------------------------------------------------------//
> +// Global module index writer.
> +//----------------------------------------------------------------------------//
> +
> +namespace {
> +  /// \brief Provides information about a specific module file.
> +  struct ModuleFileInfo {
> +    /// \brief The numberic ID for this module file.
> +    unsigned ID;
> +
> +    /// \brief The set of modules on which this module depends. Each entry is
> +    /// a module ID.
> +    SmallVector<unsigned, 4> Dependencies;
> +  };
> +
> +  /// \brief Builder that generates the global module index file.
> +  class GlobalModuleIndexBuilder {
> +    FileManager &FileMgr;
> +
> +    /// \brief Mapping from files to module file information.
> +    typedef llvm::MapVector<const FileEntry *, ModuleFileInfo> ModuleFilesMap;
> +
> +    /// \brief Information about each of the known module files.
> +    ModuleFilesMap ModuleFiles;
> +
> +    /// \brief Mapping from identifiers to the list of module file IDs that
> +    /// consider this identifier to be interesting.
> +    typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
> +
> +    /// \brief A mapping from all interesting identifiers to the set of module
> +    /// files in which those identifiers are considered interesting.
> +    InterestingIdentifierMap InterestingIdentifiers;
> +
> +    /// \brief Write the block-info block for the global module index file.
> +    void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
> +
> +    /// \brief Retrieve the module file information for the given file.
> +    ModuleFileInfo &getModuleFileInfo(const FileEntry *File) {
> +      llvm::MapVector<const FileEntry *, ModuleFileInfo>::iterator Known
> +        = ModuleFiles.find(File);
> +      if (Known != ModuleFiles.end())
> +        return Known->second;
> +
> +      unsigned NewID = ModuleFiles.size();
> +      ModuleFileInfo &Info = ModuleFiles[File];
> +      Info.ID = NewID;
> +      return Info;
> +    }
> +
> +  public:
> +    explicit GlobalModuleIndexBuilder(FileManager &FileMgr) : FileMgr(FileMgr){}
> +
> +    /// \brief Load the contents of the given module file into the builder.
> +    ///
> +    /// \returns true if an error occurred, false otherwise.
> +    bool loadModuleFile(const FileEntry *File);
> +
> +    /// \brief Write the index to the given bitstream.
> +    void writeIndex(llvm::BitstreamWriter &Stream);
> +  };
> +}
> +
> +static void emitBlockID(unsigned ID, const char *Name,
> +                        llvm::BitstreamWriter &Stream,
> +                        SmallVectorImpl<uint64_t> &Record) {
> +  Record.clear();
> +  Record.push_back(ID);
> +  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
> +
> +  // Emit the block name if present.
> +  if (Name == 0 || Name[0] == 0) return;
> +  Record.clear();
> +  while (*Name)
> +    Record.push_back(*Name++);
> +  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
> +}
> +
> +static void emitRecordID(unsigned ID, const char *Name,
> +                         llvm::BitstreamWriter &Stream,
> +                         SmallVectorImpl<uint64_t> &Record) {
> +  Record.clear();
> +  Record.push_back(ID);
> +  while (*Name)
> +    Record.push_back(*Name++);
> +  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
> +}
> +
> +void
> +GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
> +  SmallVector<uint64_t, 64> Record;
> +  Stream.EnterSubblock(llvm::bitc::BLOCKINFO_BLOCK_ID, 3);
> +
> +#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
> +#define RECORD(X) emitRecordID(X, #X, Stream, Record)
> +  BLOCK(GLOBAL_INDEX_BLOCK);
> +  RECORD(METADATA);
> +  RECORD(MODULE);
> +  RECORD(IDENTIFIER_INDEX);
> +#undef RECORD
> +#undef BLOCK
> +
> +  Stream.ExitBlock();
> +}
> +
> +namespace clang {
> +  class InterestingASTIdentifierLookupTrait
> +    : public serialization::reader::ASTIdentifierLookupTraitBase {
> +
> +  public:
> +    /// \brief The identifier and whether it is "interesting".
> +    typedef std::pair<StringRef, bool> data_type;
> +
> +    data_type ReadData(const internal_key_type& k,
> +                       const unsigned char* d,
> +                       unsigned DataLen) {
> +      // The first bit indicates whether this identifier is interesting.
> +      // That's all we care about.
> +      using namespace clang::io;
> +      unsigned RawID = ReadUnalignedLE32(d);
> +      bool IsInteresting = RawID & 0x01;
> +      return std::make_pair(k, IsInteresting);
> +    }
> +  };
> +}
> +
> +bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
> +  // Open the module file.
> +  OwningPtr<llvm::MemoryBuffer> Buffer;
> +  Buffer.reset(FileMgr.getBufferForFile(File));
> +  if (!Buffer) {
> +    return true;
> +  }
> +
> +  // Initialize the input stream
> +  llvm::BitstreamReader InStreamFile;
> +  llvm::BitstreamCursor InStream;
> +  InStreamFile.init((const unsigned char *)Buffer->getBufferStart(),
> +                  (const unsigned char *)Buffer->getBufferEnd());
> +  InStream.init(InStreamFile);
> +
> +  // Sniff for the signature.
> +  if (InStream.Read(8) != 'C' ||
> +      InStream.Read(8) != 'P' ||
> +      InStream.Read(8) != 'C' ||
> +      InStream.Read(8) != 'H') {
> +    return true;
> +  }
> +
> +  // Record this module file and assign it a unique ID (if it doesn't have
> +  // one already).
> +  unsigned ID = getModuleFileInfo(File).ID;
> +
> +  // Search for the blocks and records we care about.
> +  enum { Outer, ControlBlock, ASTBlock } State = Outer;
> +  bool Done = false;
> +  while (!Done) {
> +    const unsigned Flags = llvm::BitstreamCursor::AF_DontPopBlockAtEnd;
> +    llvm::BitstreamEntry Entry = InStream.advance(Flags);
> +    switch (Entry.Kind) {
> +    case llvm::BitstreamEntry::Error:
> +      return true;
> +
> +    case llvm::BitstreamEntry::Record:
> +      // In the outer state, just skip the record. We don't care.
> +      if (State == Outer) {
> +        InStream.skipRecord(Entry.ID);
> +        continue;
> +      }
> +
> +      // Handle potentially-interesting records below.
> +      break;
> +
> +    case llvm::BitstreamEntry::SubBlock:
> +      if (State == Outer && Entry.ID == CONTROL_BLOCK_ID) {
> +        if (InStream.EnterSubBlock(CONTROL_BLOCK_ID))
> +          return true;
> +
> +        // Found the control block.
> +        State = ControlBlock;
> +        continue;
> +      }
> +
> +      if (State == Outer && Entry.ID == AST_BLOCK_ID) {
> +        if (InStream.EnterSubBlock(AST_BLOCK_ID))
> +          return true;
> +
> +        // Found the AST block.
> +        State = ASTBlock;
> +        continue;
> +
> +      }
> +
> +      if (InStream.SkipBlock())
> +        return true;
> +
> +      continue;
> +
> +    case llvm::BitstreamEntry::EndBlock:
> +      if (State == Outer) {
> +        Done = true;
> +      }
> +      State = Outer;
> +      continue;
> +    }
> +
> +    // Read the given record.
> +    SmallVector<uint64_t, 64> Record;
> +    StringRef Blob;
> +    unsigned Code = InStream.readRecord(Entry.ID, Record, &Blob);
> +
> +    // Handle module dependencies.
> +    if (State == ControlBlock && Code == IMPORTS) {
> +      // Load each of the imported PCH files.
> +      unsigned Idx = 0, N = Record.size();
> +      while (Idx < N) {
> +        // Read information about the AST file.
> +
> +        // Skip the imported kind
> +        ++Idx;
> +
> +        // Skip the import location
> +        ++Idx;
> +
> +        // Retrieve the imported file name.
> +        unsigned Length = Record[Idx++];
> +        SmallString<128> ImportedFile(Record.begin() + Idx,
> +                                      Record.begin() + Idx + Length);
> +        Idx += Length;
> +
> +        // Find the imported module file.
> +        const FileEntry *DependsOnFile = FileMgr.getFile(ImportedFile);
> +        if (!DependsOnFile)
> +          return true;
> +
> +        // Record the dependency.
> +        unsigned DependsOnID = getModuleFileInfo(DependsOnFile).ID;
> +        getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
> +      }
> +
> +      continue;
> +    }
> +
> +    // Handle the identifier table
> +    if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
> +      typedef OnDiskChainedHashTable<InterestingASTIdentifierLookupTrait>
> +        InterestingIdentifierTable;
> +      llvm::OwningPtr<InterestingIdentifierTable>
> +        Table(InterestingIdentifierTable::Create(
> +                (const unsigned char *)Blob.data() + Record[0],
> +                (const unsigned char *)Blob.data()));
> +      for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
> +                                                     DEnd = Table->data_end();
> +           D != DEnd; ++D) {
> +        std::pair<StringRef, bool> Ident = *D;
> +        if (Ident.second)
> +          InterestingIdentifiers[Ident.first].push_back(ID);
> +      }
> +    }
> +
> +    // FIXME: Handle the selector table.
> +
> +    // We don't care about this record.
> +  }
> +
> +  return false;
> +}
> +
> +namespace {
> +
> +/// \brief Trait used to generate the identifier index as an on-disk hash
> +/// table.
> +class IdentifierIndexWriterTrait {
> +public:
> +  typedef StringRef key_type;
> +  typedef StringRef key_type_ref;
> +  typedef SmallVector<unsigned, 2> data_type;
> +  typedef const SmallVector<unsigned, 2> &data_type_ref;
> +
> +  static unsigned ComputeHash(key_type_ref Key) {
> +    return llvm::HashString(Key);
> +  }
> +
> +  std::pair<unsigned,unsigned>
> +  EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
> +    unsigned KeyLen = Key.size();
> +    unsigned DataLen = Data.size() * 4;
> +    clang::io::Emit16(Out, KeyLen);
> +    clang::io::Emit16(Out, DataLen);
> +    return std::make_pair(KeyLen, DataLen);
> +  }
> +
> +  void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
> +    Out.write(Key.data(), KeyLen);
> +  }
> +
> +  void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
> +                unsigned DataLen) {
> +    for (unsigned I = 0, N = Data.size(); I != N; ++I)
> +      clang::io::Emit32(Out, Data[I]);
> +  }
> +};
> +
> +}
> +
> +void GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
> +  using namespace llvm;
> +
> +  // Emit the file header.
> +  Stream.Emit((unsigned)'B', 8);
> +  Stream.Emit((unsigned)'C', 8);
> +  Stream.Emit((unsigned)'G', 8);
> +  Stream.Emit((unsigned)'I', 8);
> +
> +  // Write the block-info block, which describes the records in this bitcode
> +  // file.
> +  emitBlockInfoBlock(Stream);
> +
> +  Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
> +
> +  // Write the metadata.
> +  SmallVector<uint64_t, 2> Record;
> +  Record.push_back(CurrentVersion);
> +  Stream.EmitRecord(METADATA, Record);
> +
> +  // Write the set of known module files.
> +  for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
> +                                MEnd = ModuleFiles.end();
> +       M != MEnd; ++M) {
> +    Record.clear();
> +    Record.push_back(M->second.ID);
> +    Record.push_back(M->first->getSize());
> +    Record.push_back(M->first->getModificationTime());
> +
> +    // File name
> +    StringRef Name(M->first->getName());
> +    Record.push_back(Name.size());
> +    Record.append(Name.begin(), Name.end());
> +
> +    // Dependencies
> +    Record.push_back(M->second.Dependencies.size());
> +    Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
> +    Stream.EmitRecord(MODULE, Record);
> +  }
> +
> +  // Write the identifier -> module file mapping.
> +  {
> +    OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
> +    IdentifierIndexWriterTrait Trait;
> +
> +    // Populate the hash table.
> +    for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
> +                                            IEnd = InterestingIdentifiers.end();
> +         I != IEnd; ++I) {
> +      Generator.insert(I->first(), I->second, Trait);
> +    }
> +
> +    // Create the on-disk hash table in a buffer.
> +    SmallString<4096> IdentifierTable;
> +    uint32_t BucketOffset;
> +    {
> +      llvm::raw_svector_ostream Out(IdentifierTable);
> +      // Make sure that no bucket is at offset 0
> +      clang::io::Emit32(Out, 0);
> +      BucketOffset = Generator.Emit(Out, Trait);
> +    }
> +
> +    // Create a blob abbreviation
> +    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
> +    Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
> +    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
> +    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
> +    unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
> +
> +    // Write the identifier table
> +    Record.clear();
> +    Record.push_back(IDENTIFIER_INDEX);
> +    Record.push_back(BucketOffset);
> +    Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable.str());
> +  }
> +
> +  // FIXME: Selectors.
> +
> +  Stream.ExitBlock();
> +}
> +
> +GlobalModuleIndex::ErrorCode
> +GlobalModuleIndex::writeIndex(FileManager &FileMgr, StringRef Path) {
> +  llvm::SmallString<128> IndexPath;
> +  IndexPath += Path;
> +  llvm::sys::path::append(IndexPath, IndexFileName);
> +
> +  // Coordinate building the global index file with other processes that might
> +  // try to do the same.
> +  llvm::LockFileManager Locked(IndexPath);
> +  switch (Locked) {
> +  case llvm::LockFileManager::LFS_Error:
> +    return EC_IOError;
> +
> +  case llvm::LockFileManager::LFS_Owned:
> +    // We're responsible for building the index ourselves. Do so below.
> +    break;
> +
> +  case llvm::LockFileManager::LFS_Shared:
> +    // Someone else is responsible for building the index. We don't care
> +    // when they finish, so we're done.
> +    return EC_Building;
> +  }
> +
> +  // The module index builder.
> +  GlobalModuleIndexBuilder Builder(FileMgr);
> +
> +  // Load each of the module files.
> +  llvm::error_code EC;
> +  for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
> +       D != DEnd && !EC;
> +       D.increment(EC)) {
> +    // If this isn't a module file, we don't care.
> +    if (llvm::sys::path::extension(D->path()) != ".pcm") {
> +      // ... unless it's a .pcm.lock file, which indicates that someone is
> +      // in the process of rebuilding a module. They'll rebuild the index
> +      // at the end of that translation unit, so we don't have to.
> +      if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
> +        return EC_Building;
> +
> +      continue;
> +    }
> +
> +    // If we can't find the module file, skip it.
> +    const FileEntry *ModuleFile = FileMgr.getFile(D->path());
> +    if (!ModuleFile)
> +      continue;
> +
> +    // Load this module file.
> +    if (Builder.loadModuleFile(ModuleFile))
> +      return EC_IOError;
> +  }
> +
> +  // The output buffer, into which the global index will be written.
> +  SmallVector<char, 16> OutputBuffer;
> +  {
> +    llvm::BitstreamWriter OutputStream(OutputBuffer);
> +    Builder.writeIndex(OutputStream);
> +  }
> +
> +  // Write the global index file to a temporary file.
> +  llvm::SmallString<128> IndexTmpPath;
> +  int TmpFD;
> +  if (llvm::sys::fs::unique_file(IndexPath + "-%%%%%%%%", TmpFD, IndexTmpPath))
> +    return EC_IOError;
> +
> +  // Open the temporary global index file for output.
> +  std::string ErrorInfo;
> +  llvm::raw_fd_ostream Out(IndexTmpPath.c_str(), ErrorInfo,
> +                           llvm::raw_fd_ostream::F_Binary);

2nd open fails on win32, since TmpFD has been left opened.
Tweaked in r173330.

> +  if (Out.has_error())
> +    return EC_IOError;
> +
> +  // Write the index.
> +  Out.write(OutputBuffer.data(), OutputBuffer.size());
> +  Out.close();
> +  if (Out.has_error())
> +    return EC_IOError;
> +
> +  // Remove the old index file. It isn't relevant any more.
> +  bool OldIndexExisted;
> +  llvm::sys::fs::remove(IndexPath.str(), OldIndexExisted);
> +
> +  // Rename the newly-written index file to the proper name.
> +  if (llvm::sys::fs::rename(IndexTmpPath.str(), IndexPath.str())) {
> +    // Rename failed; just remove the
> +    llvm::sys::fs::remove(IndexTmpPath.str(), OldIndexExisted);
> +    return EC_IOError;
> +  }
> +
> +  // We're done.
> +  return EC_None;
> +}



More information about the cfe-commits mailing list