[clang] 0058263 - [clang-format] Support of TableGen tokens with unary operator like form, bang operators and numeric literals. (#78996)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 30 07:30:41 PST 2024
Author: Hirofumi Nakamura
Date: 2024-01-31T00:30:37+09:00
New Revision: 00582636009d51c5781b9cae8fde858ab3758306
URL: https://github.com/llvm/llvm-project/commit/00582636009d51c5781b9cae8fde858ab3758306
DIFF: https://github.com/llvm/llvm-project/commit/00582636009d51c5781b9cae8fde858ab3758306.diff
LOG: [clang-format] Support of TableGen tokens with unary operator like form, bang operators and numeric literals. (#78996)
Adds the support for tokens that have forms like unary operators.
- bang operators: `!name`
- cond operator: `!cond`
- numeric literals: `+1`, `-1`
cond operator are one of bang operators but is distinguished because it has very specific syntax.
Added:
Modified:
clang/lib/Format/FormatToken.h
clang/lib/Format/FormatTokenLexer.cpp
clang/unittests/Format/TokenAnnotatorTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index dede89f260015..bace91b5f99b4 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -148,6 +148,8 @@ namespace format {
TYPE(StructLBrace) \
TYPE(StructRBrace) \
TYPE(StructuredBindingLSquare) \
+ TYPE(TableGenBangOperator) \
+ TYPE(TableGenCondOperator) \
TYPE(TableGenMultiLineString) \
TYPE(TemplateCloser) \
TYPE(TemplateOpener) \
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 52a55ea23b5f2..d7de09ef0e12a 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -276,13 +276,44 @@ void FormatTokenLexer::tryMergePreviousTokens() {
return;
}
}
- // TableGen's Multi line string starts with [{
- if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace},
- TT_TableGenMultiLineString)) {
- // Set again with finalizing. This must never be annotated as other types.
- Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
- Tokens.back()->Tok.setKind(tok::string_literal);
- return;
+ if (Style.isTableGen()) {
+ // TableGen's Multi line string starts with [{
+ if (tryMergeTokens({tok::l_square, tok::l_brace},
+ TT_TableGenMultiLineString)) {
+ // Set again with finalizing. This must never be annotated as other types.
+ Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
+ Tokens.back()->Tok.setKind(tok::string_literal);
+ return;
+ }
+ // TableGen's bang operator is the form !<name>.
+ // !cond is a special case with specific syntax.
+ if (tryMergeTokens({tok::exclaim, tok::identifier},
+ TT_TableGenBangOperator)) {
+ Tokens.back()->Tok.setKind(tok::identifier);
+ Tokens.back()->Tok.setIdentifierInfo(nullptr);
+ if (Tokens.back()->TokenText == "!cond")
+ Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
+ else
+ Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+ return;
+ }
+ if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
+ // Here, "! if" becomes "!if". That is, ! captures if even when the space
+ // exists. That is only one possibility in TableGen's syntax.
+ Tokens.back()->Tok.setKind(tok::identifier);
+ Tokens.back()->Tok.setIdentifierInfo(nullptr);
+ Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+ return;
+ }
+ // +, - with numbers are literals. Not unary operators.
+ if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
+ Tokens.back()->Tok.setKind(tok::numeric_constant);
+ return;
+ }
+ if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
+ Tokens.back()->Tok.setKind(tok::numeric_constant);
+ return;
+ }
}
}
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 67678c18963b1..f3e443e8829bd 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2215,16 +2215,24 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
EXPECT_TRUE(Tokens[0]->IsMultiline);
EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof(" the string. }]") - 1);
+ // Numeric literals.
+ Tokens = Annotate("1234");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("-1");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("+1234");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("0b0110");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("0x1abC");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+
// Identifier tokens. In TableGen, identifiers can begin with a number.
// In ambiguous cases, the lexer tries to lex it as a number.
// Even if the try fails, it does not fall back to identifier lexing and
// regard as an error.
// The ambiguity is not documented. The result of those tests are based on the
// implementation of llvm::TGLexer::LexToken.
- Tokens = Annotate("1234");
- EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
- Tokens = Annotate("0x1abC");
- EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
// This is invalid syntax of number, but not an identifier.
Tokens = Annotate("0x1234x");
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
@@ -2249,6 +2257,14 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
EXPECT_TOKEN(Tokens[6], tok::l_brace, TT_ElseLBrace);
Tokens = Annotate("defset Foo Def2 = {}");
EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_FunctionLBrace);
+
+ // Bang Operators.
+ Tokens = Annotate("!foreach");
+ EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+ Tokens = Annotate("!if");
+ EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+ Tokens = Annotate("!cond");
+ EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
}
TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
More information about the cfe-commits
mailing list