[clang] [clang-format] TableGen multi line string support. (PR #78032)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Jan 13 04:54:31 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-format
Author: Hirofumi Nakamura (hnakamura5)
<details>
<summary>Changes</summary>
Support the handling of TableGen's multiline string (code) literal.
That has the form,
[{ this is the string possibly with multi line... }]
This is a separated part from https://github.com/llvm/llvm-project/pull/76059.
---
Full diff: https://github.com/llvm/llvm-project/pull/78032.diff
6 Files Affected:
- (modified) clang/lib/Format/ContinuationIndenter.cpp (+3)
- (modified) clang/lib/Format/FormatToken.h (+1)
- (modified) clang/lib/Format/FormatTokenLexer.cpp (+57)
- (modified) clang/lib/Format/FormatTokenLexer.h (+3)
- (modified) clang/lib/Format/TokenAnnotator.cpp (+1-1)
- (modified) clang/unittests/Format/TokenAnnotatorTest.cpp (+5)
``````````diff
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 102504182c4505..e6eaaa9ab45706 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1591,6 +1591,9 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
State.StartOfStringLiteral = State.Column + 1;
if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) {
State.StartOfStringLiteral = State.Column + 1;
+ } else if (Current.is(TT_TableGenMultiLineString) &&
+ State.StartOfStringLiteral == 0) {
+ State.StartOfStringLiteral = State.Column + 1;
} else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) {
State.StartOfStringLiteral = State.Column;
} else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index d5ef627f1348d3..dede89f2600150 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -148,6 +148,7 @@ namespace format {
TYPE(StructLBrace) \
TYPE(StructRBrace) \
TYPE(StructuredBindingLSquare) \
+ TYPE(TableGenMultiLineString) \
TYPE(TemplateCloser) \
TYPE(TemplateOpener) \
TYPE(TemplateString) \
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index a1fd6dd6effe6c..1060009bdcf131 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -93,6 +93,8 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
// string literals are correctly identified.
handleCSharpVerbatimAndInterpolatedStrings();
}
+ if (Style.isTableGen())
+ handleTableGenMultilineString();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->isNot(tok::eof));
@@ -272,6 +274,14 @@ void FormatTokenLexer::tryMergePreviousTokens() {
return;
}
}
+ if (Style.isTableGen()) {
+ if (tryMergeTokens({tok::l_square, tok::l_brace},
+ TT_TableGenMultiLineString)) {
+ // Multi line string starts with [{
+ Tokens.back()->Tok.setKind(tok::string_literal);
+ return;
+ }
+ }
}
bool FormatTokenLexer::tryMergeNSStringLiteral() {
@@ -763,6 +773,53 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
}
+void FormatTokenLexer::handleTableGenMultilineString() {
+ FormatToken *MultiLineString = Tokens.back();
+ if (MultiLineString->isNot(TT_TableGenMultiLineString))
+ return;
+
+ bool PrevIsRBrace = false;
+ const char *FirstBreak = nullptr;
+ const char *LastBreak = nullptr;
+ const char *Begin = MultiLineString->TokenText.begin();
+ // Skip until }], the closer of multi line string found.
+ for (const char *Current = Begin, *End = Lex->getBuffer().end();
+ Current != End; ++Current) {
+ if (PrevIsRBrace && *Current == ']') {
+ // }] is the end of multi line string.
+ if (!FirstBreak)
+ FirstBreak = Current;
+ MultiLineString->TokenText = StringRef(Begin, Current - Begin + 1);
+ // ColumnWidth is only the width of the first line.
+ MultiLineString->ColumnWidth = encoding::columnWidthWithTabs(
+ StringRef(Begin, FirstBreak - Begin + 1),
+ MultiLineString->OriginalColumn, Style.TabWidth, Encoding);
+ if (LastBreak) {
+ // Set LastLineColumnWidth if multi line string has multiple lines.
+ MultiLineString->LastLineColumnWidth = encoding::columnWidthWithTabs(
+ StringRef(LastBreak + 1, Current - LastBreak),
+ MultiLineString->OriginalColumn, Style.TabWidth, Encoding);
+ }
+ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Current + 1)));
+ return;
+ }
+ PrevIsRBrace = false;
+ if (*Current == '\n') {
+ MultiLineString->IsMultiline = true;
+ // Assure LastBreak is not equal to FirstBreak.
+ if (!FirstBreak)
+ FirstBreak = Current;
+ LastBreak = Current;
+ continue;
+ }
+ if (*Current == '}') {
+ // Memorize '}'. If next character is ']', they are the closer.
+ PrevIsRBrace = true;
+ continue;
+ }
+ }
+}
+
void FormatTokenLexer::handleTemplateStrings() {
FormatToken *BacktickToken = Tokens.back();
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index bb6a8ab69c1be1..1dec6bbc41514c 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -95,6 +95,9 @@ class FormatTokenLexer {
void handleCSharpVerbatimAndInterpolatedStrings();
+ // Handles TableGen multiline strings. It has the form [{ ... }].
+ void handleTableGenMultilineString();
+
void tryParsePythonComment();
bool tryMerge_TMacro();
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 24ce18a64348c1..661118970336a2 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1710,7 +1710,7 @@ class AnnotatingParser {
TT_UnionLBrace, TT_RequiresClause,
TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
- TT_BracedListLBrace)) {
+ TT_BracedListLBrace, TT_TableGenMultiLineString)) {
CurrentToken->setType(TT_Unknown);
}
CurrentToken->Role.reset();
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 92f57a77cdaf01..5ca6a76f840bdf 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2193,6 +2193,11 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
ASSERT_TRUE(Keywords.isTableGenDefinition(*Tokens[0]));
ASSERT_TRUE(Tokens[0]->is(Keywords.kw_def));
ASSERT_TRUE(Tokens[1]->is(TT_StartOfName));
+
+ // Code, the multiline string token.
+ Tokens = Annotate("[{ code is multiline string }]");
+ ASSERT_EQ(Tokens.size(), 2u) << Tokens;
+ EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString);
}
TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/78032
More information about the cfe-commits
mailing list