[PATCH] D99086: [clang][Syntax] Optimize expandedTokens for token ranges.

Utkarsh Saxena via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 25 10:31:14 PDT 2021


usaxena95 updated this revision to Diff 333344.
usaxena95 marked 4 inline comments as done.
usaxena95 added a comment.

Addressed comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99086/new/

https://reviews.llvm.org/D99086

Files:
  clang-tools-extra/clangd/ParsedAST.cpp
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/lib/Tooling/Syntax/Tokens.cpp
  clang/unittests/Tooling/Syntax/TokensTest.cpp


Index: clang/unittests/Tooling/Syntax/TokensTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -106,6 +106,7 @@
       void EndSourceFileAction() override {
         assert(Collector && "BeginSourceFileAction was never called");
         Result = std::move(*Collector).consume();
+        Result.indexExpandedTokens();
       }
 
       std::unique_ptr<ASTConsumer>
Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -183,7 +183,31 @@
   return Text.substr(Begin, length());
 }
 
+void TokenBuffer::indexExpandedTokens() {
+  // No-op if the index is already created.
+  if (!ExpandedTokIndex.empty())
+    return;
+  ExpandedTokIndex.reserve(ExpandedTokens.size());
+  // Index ExpandedTokens for faster lookups by SourceLocation.
+  for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I)
+    ExpandedTokIndex[ExpandedTokens[I].location()] = I;
+}
+
 llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
+  if (!ExpandedTokIndex.empty()) {
+    // Quick lookup if `R` is a token range.
+    // This is a huge win since majority of the users use ranges provided by an
+    // AST. Ranges in AST are token ranges from expanded token stream.
+    const auto B = ExpandedTokIndex.find(R.getBegin());
+    const auto E = ExpandedTokIndex.find(R.getEnd());
+    if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) {
+      // Add 1 to End to make a half-open range.
+      return {ExpandedTokens.data() + B->getSecond(),
+              ExpandedTokens.data() + E->getSecond() + 1};
+    }
+  }
+  // Slow case. Use `isBeforeInTranslationUnit` to binary search for the
+  // required range.
   return getTokensCovering(expandedTokens(), R, *SourceMgr);
 }
 
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -34,6 +34,7 @@
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Token.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
@@ -192,8 +193,13 @@
     return ExpandedTokens;
   }
 
+  /// Builds a cache to make future calls to expandedToken(SourceRange) faster.
+  /// Creates an index only once. Further calls to it will be no-op.
+  void indexExpandedTokens();
+
   /// Returns the subrange of expandedTokens() corresponding to the closed
   /// token range R.
+  /// Consider calling indexExpandedTokens() before for faster lookups.
   llvm::ArrayRef<syntax::Token> expandedTokens(SourceRange R) const;
 
   /// Returns the subrange of spelled tokens corresponding to AST node spanning
@@ -366,6 +372,8 @@
   /// same stream as 'clang -E' (excluding the preprocessor directives like
   /// #file, etc.).
   std::vector<syntax::Token> ExpandedTokens;
+  // Index of ExpandedTokens for faster lookups by SourceLocation.
+  llvm::DenseMap<SourceLocation, unsigned> ExpandedTokIndex;
   llvm::DenseMap<FileID, MarkedFile> Files;
   // The value is never null, pointer instead of reference to avoid disabling
   // implicit assignment operator.
Index: clang-tools-extra/clangd/ParsedAST.cpp
===================================================================
--- clang-tools-extra/clangd/ParsedAST.cpp
+++ clang-tools-extra/clangd/ParsedAST.cpp
@@ -423,6 +423,8 @@
   // tokens from running the preprocessor inside the checks (only
   // modernize-use-trailing-return-type does that today).
   syntax::TokenBuffer Tokens = std::move(CollectTokens).consume();
+  // Makes SelectionTree build much faster.
+  Tokens.indexExpandedTokens();
   std::vector<Decl *> ParsedDecls = Action->takeTopLevelDecls();
   // AST traversals should exclude the preamble, to avoid performance cliffs.
   Clang->getASTContext().setTraversalScope(ParsedDecls);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D99086.333344.patch
Type: text/x-patch
Size: 4146 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20210325/edb335cf/attachment.bin>


More information about the cfe-commits mailing list