[clang-tools-extra] 5614438 - [clangd] Optimize Dex::generateProximityURIs().
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 6 08:14:29 PDT 2022
Author: Sam McCall
Date: 2022-10-06T17:01:04+02:00
New Revision: 561443818a15e7b368ddbb58207a14c60ba55c58
URL: https://github.com/llvm/llvm-project/commit/561443818a15e7b368ddbb58207a14c60ba55c58
DIFF: https://github.com/llvm/llvm-project/commit/561443818a15e7b368ddbb58207a14c60ba55c58.diff
LOG: [clangd] Optimize Dex::generateProximityURIs().
Production profiles show that generateProximityURIs is roughly 3.8% of
buildPreamble. Of this, the majority (3% of buildPreamble) is parsing
and reserializing URIs.
We can do this with ugly string manipulation instead.
Differential Revision: https://reviews.llvm.org/D135226
Added:
Modified:
clang-tools-extra/clangd/index/dex/Dex.cpp
clang-tools-extra/clangd/index/dex/Dex.h
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp
index 2d778c13157f2..c66e7f0d936ce 100644
--- a/clang-tools-extra/clangd/index/dex/Dex.cpp
+++ b/clang-tools-extra/clangd/index/dex/Dex.cpp
@@ -163,8 +163,8 @@ std::unique_ptr<Iterator> Dex::createFileProximityIterator(
llvm::StringMap<SourceParams> Sources;
for (const auto &Path : ProximityPaths) {
Sources[Path] = SourceParams();
- auto PathURI = URI::create(Path);
- const auto PathProximityURIs = generateProximityURIs(PathURI.toString());
+ auto PathURI = URI::create(Path).toString();
+ const auto PathProximityURIs = generateProximityURIs(PathURI.c_str());
for (const auto &ProximityURI : PathProximityURIs)
ParentURIs.insert(ProximityURI);
}
@@ -353,30 +353,49 @@ size_t Dex::estimateMemoryUsage() const {
return Bytes + BackingDataSize;
}
-std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath) {
- std::vector<std::string> Result;
- auto ParsedURI = URI::parse(URIPath);
- assert(ParsedURI &&
- "Non-empty argument of generateProximityURIs() should be a valid "
- "URI.");
- llvm::StringRef Body = ParsedURI->body();
- // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
- // size of resulting vector. Some projects might want to have higher limit if
- // the file hierarchy is deeper. For the generic case, it would be useful to
- // calculate Limit in the index build stage by calculating the maximum depth
- // of the project source tree at runtime.
- size_t Limit = 5;
- // Insert original URI before the loop: this would save a redundant iteration
- // with a URI parse.
- Result.emplace_back(ParsedURI->toString());
- while (!Body.empty() && --Limit > 0) {
- // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and
- // could be optimized.
- Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix);
- if (!Body.empty())
- Result.emplace_back(
- URI(ParsedURI->scheme(), ParsedURI->authority(), Body).toString());
+// Given foo://bar/one/two
+// Returns ~~~~~~~~ (or empty for bad URI)
+llvm::StringRef findPathInURI(llvm::StringRef S) {
+ S = S.split(':').second; // Eat scheme.
+ if (S.consume_front("//")) // Eat authority.
+ S = S.drop_until([](char C) { return C == '/'; });
+ return S;
+}
+
+// FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
+// size of resulting vector. Some projects might want to have higher limit if
+// the file hierarchy is deeper. For the generic case, it would be useful to
+// calculate Limit in the index build stage by calculating the maximum depth
+// of the project source tree at runtime.
+constexpr unsigned ProximityURILimit = 5;
+
+llvm::SmallVector<llvm::StringRef, ProximityURILimit>
+generateProximityURIs(llvm::StringRef URI) {
+ // This function is hot when indexing, so don't parse/reserialize URIPath,
+ // just emit substrings of it instead.
+ //
+ // foo://bar/one/two
+ // ~~~~~~~~
+ // Path
+ llvm::StringRef Path = findPathInURI(URI);
+ if (Path.empty())
+ return {}; // Bad URI.
+ assert(Path.begin() >= URI.begin() && Path.begin() < URI.end() &&
+ Path.end() == URI.end());
+
+ // The original is a proximity URI.
+ llvm::SmallVector<llvm::StringRef, ProximityURILimit> Result = {URI};
+ // Form proximity URIs by chopping before each slash in the path part.
+ for (auto Slash = Path.rfind('/'); Slash > 0 && Slash != StringRef::npos;
+ Slash = Path.rfind('/')) {
+ Path = Path.substr(0, Slash);
+ Result.push_back(URI.substr(0, Path.end() - URI.data()));
+ if (Result.size() == ProximityURILimit)
+ return Result;
}
+ // The root foo://bar/ is a proximity URI.
+ if (Path.startswith("/"))
+ Result.push_back(URI.substr(0, Path.begin() + 1 - URI.data()));
return Result;
}
diff --git a/clang-tools-extra/clangd/index/dex/Dex.h b/clang-tools-extra/clangd/index/dex/Dex.h
index f29e7496946c1..69e161d51135b 100644
--- a/clang-tools-extra/clangd/index/dex/Dex.h
+++ b/clang-tools-extra/clangd/index/dex/Dex.h
@@ -132,7 +132,7 @@ class Dex : public SymbolIndex {
/// Should be used within the index build process.
///
/// This function is exposed for testing only.
-std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
+llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(llvm::StringRef);
} // namespace dex
} // namespace clangd
More information about the cfe-commits
mailing list