[clang] [clang-format] Support of TableGen tokens with unary operator like form, bang operators and numeric literals. (PR #78996)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 22 07:57:32 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-format
Author: Hirofumi Nakamura (hnakamura5)
<details>
<summary>Changes</summary>
Adds the support for tokens that have forms like unary operators.
- bang operators: `!name`
- cond operator: `!cond`
- numeric literals: `+1`, `-1`
cond operator are one of bang operators but is distinguished because it has very specific syntax.
---
Full diff: https://github.com/llvm/llvm-project/pull/78996.diff
3 Files Affected:
- (modified) clang/lib/Format/FormatToken.h (+2)
- (modified) clang/lib/Format/FormatTokenLexer.cpp (+38-7)
- (modified) clang/unittests/Format/TokenAnnotatorTest.cpp (+20-4)
``````````diff
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index dede89f2600150f..bace91b5f99b4df 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -148,6 +148,8 @@ namespace format {
TYPE(StructLBrace) \
TYPE(StructRBrace) \
TYPE(StructuredBindingLSquare) \
+ TYPE(TableGenBangOperator) \
+ TYPE(TableGenCondOperator) \
TYPE(TableGenMultiLineString) \
TYPE(TemplateCloser) \
TYPE(TemplateOpener) \
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 52a55ea23b5f2f7..d7de09ef0e12ab6 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -276,13 +276,44 @@ void FormatTokenLexer::tryMergePreviousTokens() {
return;
}
}
- // TableGen's Multi line string starts with [{
- if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace},
- TT_TableGenMultiLineString)) {
- // Set again with finalizing. This must never be annotated as other types.
- Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
- Tokens.back()->Tok.setKind(tok::string_literal);
- return;
+ if (Style.isTableGen()) {
+ // TableGen's Multi line string starts with [{
+ if (tryMergeTokens({tok::l_square, tok::l_brace},
+ TT_TableGenMultiLineString)) {
+ // Set again with finalizing. This must never be annotated as other types.
+ Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
+ Tokens.back()->Tok.setKind(tok::string_literal);
+ return;
+ }
+ // TableGen's bang operator is the form !<name>.
+ // !cond is a special case with specific syntax.
+ if (tryMergeTokens({tok::exclaim, tok::identifier},
+ TT_TableGenBangOperator)) {
+ Tokens.back()->Tok.setKind(tok::identifier);
+ Tokens.back()->Tok.setIdentifierInfo(nullptr);
+ if (Tokens.back()->TokenText == "!cond")
+ Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
+ else
+ Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+ return;
+ }
+ if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
+ // Here, "! if" becomes "!if". That is, ! captures if even when the space
+ // exists. That is only one possibility in TableGen's syntax.
+ Tokens.back()->Tok.setKind(tok::identifier);
+ Tokens.back()->Tok.setIdentifierInfo(nullptr);
+ Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+ return;
+ }
+ // +, - with numbers are literals. Not unary operators.
+ if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
+ Tokens.back()->Tok.setKind(tok::numeric_constant);
+ return;
+ }
+ if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
+ Tokens.back()->Tok.setKind(tok::numeric_constant);
+ return;
+ }
}
}
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 3dbf504c35ed55e..cb93930e0fc3bc8 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2210,16 +2210,24 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
EXPECT_TRUE(Tokens[0]->IsMultiline);
EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof(" the string. }]") - 1);
+ // Numeric literals.
+ Tokens = Annotate("1234");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("-1");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("+1234");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("0b0110");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("0x1abC");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+
// Identifier tokens. In TableGen, identifiers can begin with a number.
// In ambiguous cases, the lexer tries to lex it as a number.
// Even if the try fails, it does not fall back to identifier lexing and
// regard as an error.
// The ambiguity is not documented. The result of those tests are based on the
// implementation of llvm::TGLexer::LexToken.
- Tokens = Annotate("1234");
- EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
- Tokens = Annotate("0x1abC");
- EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
// This is invalid syntax of number, but not an identifier.
Tokens = Annotate("0x1234x");
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
@@ -2244,6 +2252,14 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
EXPECT_TOKEN(Tokens[6], tok::l_brace, TT_ElseLBrace);
Tokens = Annotate("defset Foo Def2 = {}");
EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_FunctionLBrace);
+
+ // Bang Operators.
+ Tokens = Annotate("!foreach");
+ EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+ Tokens = Annotate("!if");
+ EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+ Tokens = Annotate("!cond");
+ EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
}
TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/78996
More information about the cfe-commits
mailing list