[clang-tools-extra] 98bb094 - [clangd] Use tokenize instead of raw lexer in SourceCode/lex
Kadir Cetinkaya via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 28 00:38:37 PST 2020
Author: Kadir Cetinkaya
Date: 2020-02-28T09:38:26+01:00
New Revision: 98bb094c1e0580833a79d60dbc853bd1a43aa1a6
URL: https://github.com/llvm/llvm-project/commit/98bb094c1e0580833a79d60dbc853bd1a43aa1a6
DIFF: https://github.com/llvm/llvm-project/commit/98bb094c1e0580833a79d60dbc853bd1a43aa1a6.diff
LOG: [clangd] Use tokenize instead of raw lexer in SourceCode/lex
Reviewers: hokein, sammccall
Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D75249
Added:
Modified:
clang-tools-extra/clangd/SourceCode.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index 2b0a857228bb..3feddd1df24b 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -23,6 +23,7 @@
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/Token.h"
#include "clang/Tooling/Core/Replacement.h"
+#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
@@ -612,31 +613,26 @@ cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
static void
lex(llvm::StringRef Code, const LangOptions &LangOpts,
- llvm::function_ref<void(const clang::Token &, const SourceManager &SM)>
+ llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)>
Action) {
// FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
std::string NullTerminatedCode = Code.str();
SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
auto &SM = FileSM.get();
- auto FID = SM.getMainFileID();
- // Create a raw lexer (with no associated preprocessor object).
- Lexer Lex(FID, SM.getBuffer(FID), SM, LangOpts);
- Token Tok;
-
- while (!Lex.LexFromRawLexer(Tok))
+ for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts))
Action(Tok, SM);
- // LexFromRawLexer returns true after it lexes last token, so we still have
- // one more token to report.
- Action(Tok, SM);
}
llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
const format::FormatStyle &Style) {
llvm::StringMap<unsigned> Identifiers;
auto LangOpt = format::getFormattingLangOpts(Style);
- lex(Content, LangOpt, [&](const clang::Token &Tok, const SourceManager &) {
- if (Tok.getKind() == tok::raw_identifier)
- ++Identifiers[Tok.getRawIdentifier()];
+ lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) {
+ if (Tok.kind() == tok::identifier)
+ ++Identifiers[Tok.text(SM)];
+ // FIXME: Should this function really return keywords too ?
+ else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind()))
+ ++Identifiers[Keyword];
});
return Identifiers;
}
@@ -645,16 +641,13 @@ std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
llvm::StringRef Content,
const LangOptions &LangOpts) {
std::vector<Range> Ranges;
- lex(Content, LangOpts, [&](const clang::Token &Tok, const SourceManager &SM) {
- if (Tok.getKind() != tok::raw_identifier)
- return;
- if (Tok.getRawIdentifier() != Identifier)
- return;
- auto Range = getTokenRange(SM, LangOpts, Tok.getLocation());
- if (!Range)
- return;
- Ranges.push_back(*Range);
- });
+ lex(Content, LangOpts,
+ [&](const syntax::Token &Tok, const SourceManager &SM) {
+ if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier)
+ return;
+ if (auto Range = getTokenRange(SM, LangOpts, Tok.location()))
+ Ranges.push_back(*Range);
+ });
return Ranges;
}
@@ -691,97 +684,113 @@ void parseNamespaceEvents(llvm::StringRef Code,
NamespaceEvent Event;
lex(Code, format::getFormattingLangOpts(Style),
- [&](const clang::Token &Tok,const SourceManager &SM) {
- Event.Pos = sourceLocToPosition(SM, Tok.getLocation());
- switch (Tok.getKind()) {
- case tok::raw_identifier:
- // In raw mode, this could be a keyword or a name.
- switch (State) {
- case UsingNamespace:
- case UsingNamespaceName:
- NSName.append(std::string(Tok.getRawIdentifier()));
- State = UsingNamespaceName;
- break;
- case Namespace:
- case NamespaceName:
- NSName.append(std::string(Tok.getRawIdentifier()));
- State = NamespaceName;
- break;
- case Using:
- State =
- (Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default;
- break;
- case Default:
- NSName.clear();
- if (Tok.getRawIdentifier() == "namespace")
- State = Namespace;
- else if (Tok.getRawIdentifier() == "using")
- State = Using;
- break;
- }
- break;
- case tok::coloncolon:
- // This can come at the beginning or in the middle of a namespace name.
- switch (State) {
- case UsingNamespace:
- case UsingNamespaceName:
- NSName.append("::");
- State = UsingNamespaceName;
- break;
- case NamespaceName:
- NSName.append("::");
- State = NamespaceName;
- break;
- case Namespace: // Not legal here.
- case Using:
- case Default:
- State = Default;
- break;
- }
- break;
- case tok::l_brace:
- // Record which { started a namespace, so we know when } ends one.
- if (State == NamespaceName) {
- // Parsed: namespace <name> {
- BraceStack.push_back(true);
- Enclosing.push_back(NSName);
- Event.Trigger = NamespaceEvent::BeginNamespace;
- Event.Payload = llvm::join(Enclosing, "::");
- Callback(Event);
- } else {
- // This case includes anonymous namespaces (State = Namespace).
- // For our purposes, they're not namespaces and we ignore them.
- BraceStack.push_back(false);
- }
- State = Default;
- break;
- case tok::r_brace:
- // If braces are unmatched, we're going to be confused, but don't crash.
- if (!BraceStack.empty()) {
- if (BraceStack.back()) {
- // Parsed: } // namespace
- Enclosing.pop_back();
- Event.Trigger = NamespaceEvent::EndNamespace;
- Event.Payload = llvm::join(Enclosing, "::");
- Callback(Event);
+ [&](const syntax::Token &Tok, const SourceManager &SM) {
+ Event.Pos = sourceLocToPosition(SM, Tok.location());
+ switch (Tok.kind()) {
+ case tok::kw_using:
+ State = State == Default ? Using : Default;
+ break;
+ case tok::kw_namespace:
+ switch (State) {
+ case Using:
+ State = UsingNamespace;
+ break;
+ case Default:
+ State = Namespace;
+ break;
+ default:
+ State = Default;
+ break;
+ }
+ break;
+ case tok::identifier:
+ switch (State) {
+ case UsingNamespace:
+ NSName.clear();
+ LLVM_FALLTHROUGH;
+ case UsingNamespaceName:
+ NSName.append(Tok.text(SM).str());
+ State = UsingNamespaceName;
+ break;
+ case Namespace:
+ NSName.clear();
+ LLVM_FALLTHROUGH;
+ case NamespaceName:
+ NSName.append(Tok.text(SM).str());
+ State = NamespaceName;
+ break;
+ case Using:
+ case Default:
+ State = Default;
+ break;
+ }
+ break;
+ case tok::coloncolon:
+ // This can come at the beginning or in the middle of a namespace
+ // name.
+ switch (State) {
+ case UsingNamespace:
+ NSName.clear();
+ LLVM_FALLTHROUGH;
+ case UsingNamespaceName:
+ NSName.append("::");
+ State = UsingNamespaceName;
+ break;
+ case NamespaceName:
+ NSName.append("::");
+ State = NamespaceName;
+ break;
+ case Namespace: // Not legal here.
+ case Using:
+ case Default:
+ State = Default;
+ break;
+ }
+ break;
+ case tok::l_brace:
+ // Record which { started a namespace, so we know when } ends one.
+ if (State == NamespaceName) {
+ // Parsed: namespace <name> {
+ BraceStack.push_back(true);
+ Enclosing.push_back(NSName);
+ Event.Trigger = NamespaceEvent::BeginNamespace;
+ Event.Payload = llvm::join(Enclosing, "::");
+ Callback(Event);
+ } else {
+ // This case includes anonymous namespaces (State = Namespace).
+ // For our purposes, they're not namespaces and we ignore them.
+ BraceStack.push_back(false);
+ }
+ State = Default;
+ break;
+ case tok::r_brace:
+ // If braces are unmatched, we're going to be confused, but don't
+ // crash.
+ if (!BraceStack.empty()) {
+ if (BraceStack.back()) {
+ // Parsed: } // namespace
+ Enclosing.pop_back();
+ Event.Trigger = NamespaceEvent::EndNamespace;
+ Event.Payload = llvm::join(Enclosing, "::");
+ Callback(Event);
+ }
+ BraceStack.pop_back();
+ }
+ break;
+ case tok::semi:
+ if (State == UsingNamespaceName) {
+ // Parsed: using namespace <name> ;
+ Event.Trigger = NamespaceEvent::UsingDirective;
+ Event.Payload = std::move(NSName);
+ Callback(Event);
+ }
+ State = Default;
+ break;
+ default:
+ State = Default;
+ break;
}
- BraceStack.pop_back();
- }
- break;
- case tok::semi:
- if (State == UsingNamespaceName) {
- // Parsed: using namespace <name> ;
- Event.Trigger = NamespaceEvent::UsingDirective;
- Event.Payload = std::move(NSName);
- Callback(Event);
- }
- State = Default;
- break;
- default:
- State = Default;
- break;
- }
- });
+ });
}
// Returns the prefix namespaces of NS: {"" ... NS}.
More information about the cfe-commits
mailing list