[clang] 4da126c - [index-while-building] PathIndexer

Jan Korous via cfe-commits cfe-commits at lists.llvm.org
Wed Aug 19 11:25:42 PDT 2020


Author: Jan Korous
Date: 2020-08-19T11:25:21-07:00
New Revision: 4da126c3748f3b6c6251e45614b12d3aa118d047

URL: https://github.com/llvm/llvm-project/commit/4da126c3748f3b6c6251e45614b12d3aa118d047
DIFF: https://github.com/llvm/llvm-project/commit/4da126c3748f3b6c6251e45614b12d3aa118d047.diff

LOG: [index-while-building] PathIndexer

Differential Revision: https://reviews.llvm.org/D66854

Added: 
    clang/include/clang/IndexSerialization/SerializablePathCollection.h
    clang/lib/IndexSerialization/CMakeLists.txt
    clang/lib/IndexSerialization/SerializablePathCollection.cpp

Modified: 
    clang/lib/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/IndexSerialization/SerializablePathCollection.h b/clang/include/clang/IndexSerialization/SerializablePathCollection.h
new file mode 100644
index 000000000000..20cf8fbdad96
--- /dev/null
+++ b/clang/include/clang/IndexSerialization/SerializablePathCollection.h
@@ -0,0 +1,129 @@
+//===--- SerializablePathCollection.h -- Index of paths ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_INDEX_SerializablePathCollection_H
+#define LLVM_CLANG_INDEX_SerializablePathCollection_H
+
+#include "clang/Basic/FileManager.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace index {
+
+/// Pool of strings
+class StringPool {
+  llvm::SmallString<512> Buffer;
+
+public:
+  struct StringOffsetSize {
+    std::size_t Offset;
+    std::size_t Size;
+
+    StringOffsetSize(size_t Offset, size_t Size) : Offset(Offset), Size(Size) {}
+  };
+
+  StringOffsetSize add(StringRef Str);
+  StringRef getBuffer() const { return Buffer; }
+};
+
+/// Pool of filesystem paths backed by a StringPool
+class PathPool {
+public:
+  /// Special root directory of a filesystem path.
+  enum class RootDirKind {
+    Regular = 0,
+    CurrentWorkDir = 1,
+    SysRoot = 2,
+  };
+
+  struct DirPath {
+    RootDirKind Root;
+    StringPool::StringOffsetSize Path;
+
+    DirPath(RootDirKind Root, const StringPool::StringOffsetSize &Path)
+        : Root(Root), Path(Path) {}
+  };
+
+  struct FilePath {
+    DirPath Dir;
+    StringPool::StringOffsetSize Filename;
+
+    FilePath(const DirPath &Dir, const StringPool::StringOffsetSize &Filename)
+        : Dir(Dir), Filename(Filename) {}
+  };
+
+  /// \returns index of the newly added file in FilePaths.
+  size_t addFilePath(RootDirKind Root, const StringPool::StringOffsetSize &Dir,
+                     StringRef Filename);
+
+  /// \returns offset in Paths and size of newly added directory.
+  StringPool::StringOffsetSize addDirPath(StringRef Dir);
+
+  llvm::ArrayRef<FilePath> getFilePaths() const;
+
+  StringRef getPaths() const;
+
+private:
+  StringPool Paths;
+  std::vector<FilePath> FilePaths;
+};
+
+/// Stores file paths and produces serialization-friendly representation.
+class SerializablePathCollection {
+  std::string WorkDir;
+  std::string SysRoot;
+
+  PathPool Paths;
+  llvm::DenseMap<const clang::FileEntry *, std::size_t> UniqueFiles;
+  llvm::StringMap<PathPool::DirPath, llvm::BumpPtrAllocator> UniqueDirs;
+
+public:
+  const StringPool::StringOffsetSize WorkDirPath;
+  const StringPool::StringOffsetSize SysRootPath;
+  const StringPool::StringOffsetSize OutputFilePath;
+
+  SerializablePathCollection(llvm::StringRef CurrentWorkDir,
+                             llvm::StringRef SysRoot,
+                             llvm::StringRef OutputFile);
+
+  /// \returns buffer containing all the paths.
+  llvm::StringRef getPathsBuffer() const { return Paths.getPaths(); }
+
+  /// \returns file paths (no directories) backed by buffer exposed in
+  /// getPathsBuffer.
+  ArrayRef<PathPool::FilePath> getFilePaths() const {
+    return Paths.getFilePaths();
+  }
+
+  /// Stores path to \p FE if it hasn't been stored yet.
+  /// \returns index to array exposed by getPathsBuffer().
+  size_t tryStoreFilePath(const clang::FileEntry &FE);
+
+private:
+  /// Stores \p Path if it is non-empty.
+  /// Warning: this method doesn't check for uniqueness.
+  /// \returns offset of \p Path value begin in buffer with stored paths.
+  StringPool::StringOffsetSize storePath(llvm::StringRef Path);
+
+  /// Stores \p dirStr path if it hasn't been stored yet.
+  PathPool::DirPath tryStoreDirPath(llvm::StringRef dirStr);
+};
+
+} // namespace index
+} // namespace clang
+
+#endif // LLVM_CLANG_INDEX_SerializablePathCollection_H

diff  --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt
index c2b6a5a4d5d4..23082789ff9a 100644
--- a/clang/lib/CMakeLists.txt
+++ b/clang/lib/CMakeLists.txt
@@ -20,6 +20,7 @@ add_subdirectory(FrontendTool)
 add_subdirectory(Tooling)
 add_subdirectory(DirectoryWatcher)
 add_subdirectory(Index)
+add_subdirectory(IndexSerialization)
 if(CLANG_ENABLE_STATIC_ANALYZER)
   add_subdirectory(StaticAnalyzer)
 endif()

diff  --git a/clang/lib/IndexSerialization/CMakeLists.txt b/clang/lib/IndexSerialization/CMakeLists.txt
new file mode 100644
index 000000000000..197059fff4b3
--- /dev/null
+++ b/clang/lib/IndexSerialization/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_clang_library(clangIndexSerialization
+  SerializablePathCollection.cpp
+
+  LINK_LIBS
+  clangBasic
+  )

diff  --git a/clang/lib/IndexSerialization/SerializablePathCollection.cpp b/clang/lib/IndexSerialization/SerializablePathCollection.cpp
new file mode 100644
index 000000000000..34663738088e
--- /dev/null
+++ b/clang/lib/IndexSerialization/SerializablePathCollection.cpp
@@ -0,0 +1,91 @@
+//===--- SerializablePathCollection.cpp -- Index of paths -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/IndexSerialization/SerializablePathCollection.h"
+#include "llvm/Support/Path.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::index;
+
+StringPool::StringOffsetSize StringPool::add(StringRef Str) {
+  const std::size_t Offset = Buffer.size();
+  Buffer += Str;
+  return StringPool::StringOffsetSize(Offset, Str.size());
+}
+
+size_t PathPool::addFilePath(RootDirKind Root,
+                             const StringPool::StringOffsetSize &Dir,
+                             StringRef Filename) {
+  FilePaths.emplace_back(DirPath(Root, Dir), Paths.add(Filename));
+  return FilePaths.size() - 1;
+}
+
+StringPool::StringOffsetSize PathPool::addDirPath(StringRef Dir) {
+  return Paths.add(Dir);
+}
+
+llvm::ArrayRef<PathPool::FilePath> PathPool::getFilePaths() const {
+  return FilePaths;
+}
+
+StringRef PathPool::getPaths() const { return Paths.getBuffer(); }
+
+SerializablePathCollection::SerializablePathCollection(
+    StringRef CurrentWorkDir, StringRef SysRoot, llvm::StringRef OutputFile)
+    : WorkDir(CurrentWorkDir),
+      SysRoot(llvm::sys::path::parent_path(SysRoot).empty() ? StringRef()
+                                                            : SysRoot),
+      WorkDirPath(Paths.addDirPath(WorkDir)),
+      SysRootPath(Paths.addDirPath(SysRoot)),
+      OutputFilePath(Paths.addDirPath(OutputFile)) {}
+
+size_t SerializablePathCollection::tryStoreFilePath(const FileEntry &FE) {
+  auto FileIt = UniqueFiles.find(&FE);
+  if (FileIt != UniqueFiles.end())
+    return FileIt->second;
+
+  const auto Dir = tryStoreDirPath(sys::path::parent_path(FE.getName()));
+  const auto FileIdx =
+      Paths.addFilePath(Dir.Root, Dir.Path, sys::path::filename(FE.getName()));
+
+  UniqueFiles.try_emplace(&FE, FileIdx);
+  return FileIdx;
+}
+
+PathPool::DirPath SerializablePathCollection::tryStoreDirPath(StringRef Dir) {
+  // We don't want to strip separator if Dir is "/" - so we check size > 1.
+  while (Dir.size() > 1 && llvm::sys::path::is_separator(Dir.back()))
+    Dir = Dir.drop_back();
+
+  auto DirIt = UniqueDirs.find(Dir);
+  if (DirIt != UniqueDirs.end())
+    return DirIt->second;
+
+  const std::string OrigDir = Dir.str();
+
+  PathPool::RootDirKind Root = PathPool::RootDirKind::Regular;
+  if (!SysRoot.empty() && Dir.startswith(SysRoot) &&
+      llvm::sys::path::is_separator(Dir[SysRoot.size()])) {
+    Root = PathPool::RootDirKind::SysRoot;
+    Dir = Dir.drop_front(SysRoot.size());
+  } else if (!WorkDir.empty() && Dir.startswith(WorkDir) &&
+             llvm::sys::path::is_separator(Dir[WorkDir.size()])) {
+    Root = PathPool::RootDirKind::CurrentWorkDir;
+    Dir = Dir.drop_front(WorkDir.size());
+  }
+
+  if (Root != PathPool::RootDirKind::Regular) {
+    while (!Dir.empty() && llvm::sys::path::is_separator(Dir.front()))
+      Dir = Dir.drop_front();
+  }
+
+  PathPool::DirPath Result(Root, Paths.addDirPath(Dir));
+  UniqueDirs.try_emplace(OrigDir, Result);
+  return Result;
+}


        


More information about the cfe-commits mailing list