[PATCH] D99086: [clang][Syntax] Optimize expandedTokens for token ranges.

Utkarsh Saxena via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 25 10:54:42 PDT 2021


This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
usaxena95 marked an inline comment as done.
Closed by commit rGaa979084dffb: [clang][Syntax] Optimize expandedTokens for token ranges. (authored by usaxena95).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99086/new/

https://reviews.llvm.org/D99086

Files:
  clang-tools-extra/clangd/ParsedAST.cpp
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/lib/Tooling/Syntax/Tokens.cpp
  clang/unittests/Tooling/Syntax/TokensTest.cpp


Index: clang/unittests/Tooling/Syntax/TokensTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -106,6 +106,7 @@
       void EndSourceFileAction() override {
         assert(Collector && "BeginSourceFileAction was never called");
         Result = std::move(*Collector).consume();
+        Result.indexExpandedTokens();
       }
 
       std::unique_ptr<ASTConsumer>
Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -183,7 +183,31 @@
   return Text.substr(Begin, length());
 }
 
+void TokenBuffer::indexExpandedTokens() {
+  // No-op if the index is already created.
+  if (!ExpandedTokIndex.empty())
+    return;
+  ExpandedTokIndex.reserve(ExpandedTokens.size());
+  // Index ExpandedTokens for faster lookups by SourceLocation.
+  for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I)
+    ExpandedTokIndex[ExpandedTokens[I].location()] = I;
+}
+
 llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
+  if (!ExpandedTokIndex.empty()) {
+    // Quick lookup if `R` is a token range.
+    // This is a huge win since majority of the users use ranges provided by an
+    // AST. Ranges in AST are token ranges from expanded token stream.
+    const auto B = ExpandedTokIndex.find(R.getBegin());
+    const auto E = ExpandedTokIndex.find(R.getEnd());
+    if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) {
+      // Add 1 to End to make a half-open range.
+      return {ExpandedTokens.data() + B->getSecond(),
+              ExpandedTokens.data() + E->getSecond() + 1};
+    }
+  }
+  // Slow case. Use `isBeforeInTranslationUnit` to binary search for the
+  // required range.
   return getTokensCovering(expandedTokens(), R, *SourceMgr);
 }
 
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -34,6 +34,7 @@
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Token.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
@@ -192,8 +193,13 @@
     return ExpandedTokens;
   }
 
+  /// Builds a cache to make future calls to expandedToken(SourceRange) faster.
+  /// Creates an index only once. Further calls to it will be no-op.
+  void indexExpandedTokens();
+
   /// Returns the subrange of expandedTokens() corresponding to the closed
   /// token range R.
+  /// Consider calling indexExpandedTokens() before for faster lookups.
   llvm::ArrayRef<syntax::Token> expandedTokens(SourceRange R) const;
 
   /// Returns the subrange of spelled tokens corresponding to AST node spanning
@@ -366,6 +372,8 @@
   /// same stream as 'clang -E' (excluding the preprocessor directives like
   /// #file, etc.).
   std::vector<syntax::Token> ExpandedTokens;
+  // Index of ExpandedTokens for faster lookups by SourceLocation.
+  llvm::DenseMap<SourceLocation, unsigned> ExpandedTokIndex;
   llvm::DenseMap<FileID, MarkedFile> Files;
   // The value is never null, pointer instead of reference to avoid disabling
   // implicit assignment operator.
Index: clang-tools-extra/clangd/ParsedAST.cpp
===================================================================
--- clang-tools-extra/clangd/ParsedAST.cpp
+++ clang-tools-extra/clangd/ParsedAST.cpp
@@ -423,6 +423,8 @@
   // tokens from running the preprocessor inside the checks (only
   // modernize-use-trailing-return-type does that today).
   syntax::TokenBuffer Tokens = std::move(CollectTokens).consume();
+  // Makes SelectionTree build much faster.
+  Tokens.indexExpandedTokens();
   std::vector<Decl *> ParsedDecls = Action->takeTopLevelDecls();
   // AST traversals should exclude the preamble, to avoid performance cliffs.
   Clang->getASTContext().setTraversalScope(ParsedDecls);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D99086.333350.patch
Type: text/x-patch
Size: 4146 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20210325/36abd621/attachment-0001.bin>


More information about the cfe-commits mailing list