[clang] 4da126c - [index-while-building] PathIndexer
Jan Korous via cfe-commits
cfe-commits at lists.llvm.org
Wed Aug 19 11:25:42 PDT 2020
Author: Jan Korous
Date: 2020-08-19T11:25:21-07:00
New Revision: 4da126c3748f3b6c6251e45614b12d3aa118d047
URL: https://github.com/llvm/llvm-project/commit/4da126c3748f3b6c6251e45614b12d3aa118d047
DIFF: https://github.com/llvm/llvm-project/commit/4da126c3748f3b6c6251e45614b12d3aa118d047.diff
LOG: [index-while-building] PathIndexer
Differential Revision: https://reviews.llvm.org/D66854
Added:
clang/include/clang/IndexSerialization/SerializablePathCollection.h
clang/lib/IndexSerialization/CMakeLists.txt
clang/lib/IndexSerialization/SerializablePathCollection.cpp
Modified:
clang/lib/CMakeLists.txt
Removed:
################################################################################
diff --git a/clang/include/clang/IndexSerialization/SerializablePathCollection.h b/clang/include/clang/IndexSerialization/SerializablePathCollection.h
new file mode 100644
index 000000000000..20cf8fbdad96
--- /dev/null
+++ b/clang/include/clang/IndexSerialization/SerializablePathCollection.h
@@ -0,0 +1,129 @@
+//===--- SerializablePathCollection.h -- Index of paths ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_INDEX_SerializablePathCollection_H
+#define LLVM_CLANG_INDEX_SerializablePathCollection_H
+
+#include "clang/Basic/FileManager.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace index {
+
+/// Pool of strings
+class StringPool {
+ llvm::SmallString<512> Buffer;
+
+public:
+ struct StringOffsetSize {
+ std::size_t Offset;
+ std::size_t Size;
+
+ StringOffsetSize(size_t Offset, size_t Size) : Offset(Offset), Size(Size) {}
+ };
+
+ StringOffsetSize add(StringRef Str);
+ StringRef getBuffer() const { return Buffer; }
+};
+
+/// Pool of filesystem paths backed by a StringPool
+class PathPool {
+public:
+ /// Special root directory of a filesystem path.
+ enum class RootDirKind {
+ Regular = 0,
+ CurrentWorkDir = 1,
+ SysRoot = 2,
+ };
+
+ struct DirPath {
+ RootDirKind Root;
+ StringPool::StringOffsetSize Path;
+
+ DirPath(RootDirKind Root, const StringPool::StringOffsetSize &Path)
+ : Root(Root), Path(Path) {}
+ };
+
+ struct FilePath {
+ DirPath Dir;
+ StringPool::StringOffsetSize Filename;
+
+ FilePath(const DirPath &Dir, const StringPool::StringOffsetSize &Filename)
+ : Dir(Dir), Filename(Filename) {}
+ };
+
+ /// \returns index of the newly added file in FilePaths.
+ size_t addFilePath(RootDirKind Root, const StringPool::StringOffsetSize &Dir,
+ StringRef Filename);
+
+ /// \returns offset in Paths and size of newly added directory.
+ StringPool::StringOffsetSize addDirPath(StringRef Dir);
+
+ llvm::ArrayRef<FilePath> getFilePaths() const;
+
+ StringRef getPaths() const;
+
+private:
+ StringPool Paths;
+ std::vector<FilePath> FilePaths;
+};
+
+/// Stores file paths and produces serialization-friendly representation.
+class SerializablePathCollection {
+ std::string WorkDir;
+ std::string SysRoot;
+
+ PathPool Paths;
+ llvm::DenseMap<const clang::FileEntry *, std::size_t> UniqueFiles;
+ llvm::StringMap<PathPool::DirPath, llvm::BumpPtrAllocator> UniqueDirs;
+
+public:
+ const StringPool::StringOffsetSize WorkDirPath;
+ const StringPool::StringOffsetSize SysRootPath;
+ const StringPool::StringOffsetSize OutputFilePath;
+
+ SerializablePathCollection(llvm::StringRef CurrentWorkDir,
+ llvm::StringRef SysRoot,
+ llvm::StringRef OutputFile);
+
+ /// \returns buffer containing all the paths.
+ llvm::StringRef getPathsBuffer() const { return Paths.getPaths(); }
+
+ /// \returns file paths (no directories) backed by buffer exposed in
+ /// getPathsBuffer.
+ ArrayRef<PathPool::FilePath> getFilePaths() const {
+ return Paths.getFilePaths();
+ }
+
+ /// Stores path to \p FE if it hasn't been stored yet.
+ /// \returns index to array exposed by getPathsBuffer().
+ size_t tryStoreFilePath(const clang::FileEntry &FE);
+
+private:
+ /// Stores \p Path if it is non-empty.
+ /// Warning: this method doesn't check for uniqueness.
+ /// \returns offset of \p Path value begin in buffer with stored paths.
+ StringPool::StringOffsetSize storePath(llvm::StringRef Path);
+
+ /// Stores \p dirStr path if it hasn't been stored yet.
+ PathPool::DirPath tryStoreDirPath(llvm::StringRef dirStr);
+};
+
+} // namespace index
+} // namespace clang
+
+#endif // LLVM_CLANG_INDEX_SerializablePathCollection_H
diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt
index c2b6a5a4d5d4..23082789ff9a 100644
--- a/clang/lib/CMakeLists.txt
+++ b/clang/lib/CMakeLists.txt
@@ -20,6 +20,7 @@ add_subdirectory(FrontendTool)
add_subdirectory(Tooling)
add_subdirectory(DirectoryWatcher)
add_subdirectory(Index)
+add_subdirectory(IndexSerialization)
if(CLANG_ENABLE_STATIC_ANALYZER)
add_subdirectory(StaticAnalyzer)
endif()
diff --git a/clang/lib/IndexSerialization/CMakeLists.txt b/clang/lib/IndexSerialization/CMakeLists.txt
new file mode 100644
index 000000000000..197059fff4b3
--- /dev/null
+++ b/clang/lib/IndexSerialization/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_clang_library(clangIndexSerialization
+ SerializablePathCollection.cpp
+
+ LINK_LIBS
+ clangBasic
+ )
diff --git a/clang/lib/IndexSerialization/SerializablePathCollection.cpp b/clang/lib/IndexSerialization/SerializablePathCollection.cpp
new file mode 100644
index 000000000000..34663738088e
--- /dev/null
+++ b/clang/lib/IndexSerialization/SerializablePathCollection.cpp
@@ -0,0 +1,91 @@
+//===--- SerializablePathCollection.cpp -- Index of paths -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/IndexSerialization/SerializablePathCollection.h"
+#include "llvm/Support/Path.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace clang::index;
+
+StringPool::StringOffsetSize StringPool::add(StringRef Str) {
+ const std::size_t Offset = Buffer.size();
+ Buffer += Str;
+ return StringPool::StringOffsetSize(Offset, Str.size());
+}
+
+size_t PathPool::addFilePath(RootDirKind Root,
+ const StringPool::StringOffsetSize &Dir,
+ StringRef Filename) {
+ FilePaths.emplace_back(DirPath(Root, Dir), Paths.add(Filename));
+ return FilePaths.size() - 1;
+}
+
+StringPool::StringOffsetSize PathPool::addDirPath(StringRef Dir) {
+ return Paths.add(Dir);
+}
+
+llvm::ArrayRef<PathPool::FilePath> PathPool::getFilePaths() const {
+ return FilePaths;
+}
+
+StringRef PathPool::getPaths() const { return Paths.getBuffer(); }
+
+SerializablePathCollection::SerializablePathCollection(
+ StringRef CurrentWorkDir, StringRef SysRoot, llvm::StringRef OutputFile)
+ : WorkDir(CurrentWorkDir),
+ SysRoot(llvm::sys::path::parent_path(SysRoot).empty() ? StringRef()
+ : SysRoot),
+ WorkDirPath(Paths.addDirPath(WorkDir)),
+ SysRootPath(Paths.addDirPath(SysRoot)),
+ OutputFilePath(Paths.addDirPath(OutputFile)) {}
+
+size_t SerializablePathCollection::tryStoreFilePath(const FileEntry &FE) {
+ auto FileIt = UniqueFiles.find(&FE);
+ if (FileIt != UniqueFiles.end())
+ return FileIt->second;
+
+ const auto Dir = tryStoreDirPath(sys::path::parent_path(FE.getName()));
+ const auto FileIdx =
+ Paths.addFilePath(Dir.Root, Dir.Path, sys::path::filename(FE.getName()));
+
+ UniqueFiles.try_emplace(&FE, FileIdx);
+ return FileIdx;
+}
+
+PathPool::DirPath SerializablePathCollection::tryStoreDirPath(StringRef Dir) {
+ // We don't want to strip separator if Dir is "/" - so we check size > 1.
+ while (Dir.size() > 1 && llvm::sys::path::is_separator(Dir.back()))
+ Dir = Dir.drop_back();
+
+ auto DirIt = UniqueDirs.find(Dir);
+ if (DirIt != UniqueDirs.end())
+ return DirIt->second;
+
+ const std::string OrigDir = Dir.str();
+
+ PathPool::RootDirKind Root = PathPool::RootDirKind::Regular;
+ if (!SysRoot.empty() && Dir.startswith(SysRoot) &&
+ llvm::sys::path::is_separator(Dir[SysRoot.size()])) {
+ Root = PathPool::RootDirKind::SysRoot;
+ Dir = Dir.drop_front(SysRoot.size());
+ } else if (!WorkDir.empty() && Dir.startswith(WorkDir) &&
+ llvm::sys::path::is_separator(Dir[WorkDir.size()])) {
+ Root = PathPool::RootDirKind::CurrentWorkDir;
+ Dir = Dir.drop_front(WorkDir.size());
+ }
+
+ if (Root != PathPool::RootDirKind::Regular) {
+ while (!Dir.empty() && llvm::sys::path::is_separator(Dir.front()))
+ Dir = Dir.drop_front();
+ }
+
+ PathPool::DirPath Result(Root, Paths.addDirPath(Dir));
+ UniqueDirs.try_emplace(OrigDir, Result);
+ return Result;
+}
More information about the cfe-commits
mailing list