[clang] [clang-format][NFC] Clean up FormatTokenLexer (PR #203825)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Jun 14 22:09:54 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-format
Author: owenca (owenca)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/203825.diff
2 Files Affected:
- (modified) clang/lib/Format/FormatTokenLexer.cpp (+13-7)
- (modified) clang/lib/Format/FormatTokenLexer.h (-4)
``````````diff
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 92571c012bdb2..bf26ef04fe88f 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -33,9 +33,7 @@ FormatTokenLexer::FormatTokenLexer(
LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID),
Style(Style), IdentTable(IdentTable), Keywords(IdentTable),
Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),
- FormattingDisabled(false), FormatOffRegex(Style.OneLineFormatOffRegex),
- MacroBlockBeginRegex(Style.MacroBlockBegin),
- MacroBlockEndRegex(Style.MacroBlockEnd), VerilogProtectedBlock(false) {
+ FormattingDisabled(false), VerilogProtectedBlock(false) {
Lex = std::make_unique<Lexer>(ID, SourceMgr.getBufferOrFake(ID), SourceMgr,
LangOpts);
Lex->SetKeepWhitespaceMode(true);
@@ -88,12 +86,14 @@ FormatTokenLexer::FormatTokenLexer(
ArrayRef<FormatToken *> FormatTokenLexer::lex() {
assert(Tokens.empty());
assert(FirstInLineIndex == 0);
+
enum { FO_None, FO_CurrentLine, FO_NextLine } FormatOff = FO_None;
+ llvm::Regex FormatOffRegex(Style.OneLineFormatOffRegex);
do {
Tokens.push_back(getNextToken());
+
auto &Tok = *Tokens.back();
- const auto NewlinesBefore = Tok.NewlinesBefore;
- switch (FormatOff) {
+ switch (const auto NewlinesBefore = Tok.NewlinesBefore; FormatOff) {
case FO_NextLine:
if (NewlinesBefore > 1) {
FormatOff = FO_None;
@@ -125,13 +125,16 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
}
}
}
+
if (Style.isJavaScript()) {
tryParseJSRegexLiteral();
handleTemplateStrings();
} else if (Style.isTextProto()) {
tryParsePythonComment();
}
+
tryMergePreviousTokens();
+
if (Style.isCSharp()) {
// This needs to come after tokens have been merged so that C#
// string literals are correctly identified.
@@ -140,9 +143,11 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
handleTableGenMultilineString();
handleTableGenNumericLikeIdentifier();
}
+
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->isNot(tok::eof));
+
if (Style.InsertNewlineAtEOF) {
auto &TokEOF = *Tokens.back();
if (TokEOF.NewlinesBefore == 0) {
@@ -150,6 +155,7 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
TokEOF.OriginalColumn = 0;
}
}
+
return Tokens;
}
@@ -1466,9 +1472,9 @@ FormatToken *FormatTokenLexer::getNextToken() {
FormatTok->Tok.setKind(tok::kw_if);
}
} else if (FormatTok->is(tok::identifier)) {
- if (MacroBlockBeginRegex.match(Text))
+ if (llvm::Regex(Style.MacroBlockBegin).match(Text))
FormatTok->setType(TT_MacroBlockBegin);
- else if (MacroBlockEndRegex.match(Text))
+ else if (llvm::Regex(Style.MacroBlockEnd).match(Text))
FormatTok->setType(TT_MacroBlockEnd);
else if (MacrosSkippedByRemoveParentheses.contains(Identifier))
FormatTok->setFinalizedType(TT_FunctionLikeMacro);
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 9f5b735efe1d0..9c80eaacf1f45 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -135,10 +135,6 @@ class FormatTokenLexer {
TemplateNames, TypeNames, VariableTemplates;
bool FormattingDisabled;
- llvm::Regex FormatOffRegex; // For one line.
-
- llvm::Regex MacroBlockBeginRegex;
- llvm::Regex MacroBlockEndRegex;
// The next line is a Verilog protected block that should not be split into
// tokens. Set at the 'pragma protect' line. Cleared at the next line.
``````````
</details>
https://github.com/llvm/llvm-project/pull/203825
More information about the cfe-commits
mailing list