[clang] [clang-format] Support of TableGen tokens with unary operator like form, bang operators and numeric literal. (PR #78996)
Hirofumi Nakamura via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 22 07:56:58 PST 2024
https://github.com/hnakamura5 created https://github.com/llvm/llvm-project/pull/78996
Adds the support for tokens that have forms like unary operators.
- bang operators: `!name`
- cond operator: `!cond`
- numeric literals: `+1`, `-1`
cond operator are one of bang operators but is distinguished because it has very specific syntax.
>From af522a6ac1a2620408ec2933261ad9d17066ddff Mon Sep 17 00:00:00 2001
From: hnakamura5 <k.nakamura.hirofumi at gmail.com>
Date: Tue, 23 Jan 2024 00:50:17 +0900
Subject: [PATCH] [clang-format] Support of TableGen tokens with unary operator
like form, bang operators and numeric literal.
---
clang/lib/Format/FormatToken.h | 2 +
clang/lib/Format/FormatTokenLexer.cpp | 45 ++++++++++++++++---
clang/unittests/Format/TokenAnnotatorTest.cpp | 24 ++++++++--
3 files changed, 60 insertions(+), 11 deletions(-)
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index dede89f2600150..bace91b5f99b4d 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -148,6 +148,8 @@ namespace format {
TYPE(StructLBrace) \
TYPE(StructRBrace) \
TYPE(StructuredBindingLSquare) \
+ TYPE(TableGenBangOperator) \
+ TYPE(TableGenCondOperator) \
TYPE(TableGenMultiLineString) \
TYPE(TemplateCloser) \
TYPE(TemplateOpener) \
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 52a55ea23b5f2f..d7de09ef0e12ab 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -276,13 +276,44 @@ void FormatTokenLexer::tryMergePreviousTokens() {
return;
}
}
- // TableGen's Multi line string starts with [{
- if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace},
- TT_TableGenMultiLineString)) {
- // Set again with finalizing. This must never be annotated as other types.
- Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
- Tokens.back()->Tok.setKind(tok::string_literal);
- return;
+ if (Style.isTableGen()) {
+ // TableGen's Multi line string starts with [{
+ if (tryMergeTokens({tok::l_square, tok::l_brace},
+ TT_TableGenMultiLineString)) {
+ // Set again with finalizing. This must never be annotated as other types.
+ Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
+ Tokens.back()->Tok.setKind(tok::string_literal);
+ return;
+ }
+ // TableGen's bang operator is the form !<name>.
+ // !cond is a special case with specific syntax.
+ if (tryMergeTokens({tok::exclaim, tok::identifier},
+ TT_TableGenBangOperator)) {
+ Tokens.back()->Tok.setKind(tok::identifier);
+ Tokens.back()->Tok.setIdentifierInfo(nullptr);
+ if (Tokens.back()->TokenText == "!cond")
+ Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
+ else
+ Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+ return;
+ }
+ if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
+ // Here, "! if" becomes "!if". That is, ! captures if even when the space
+ // exists. That is only one possibility in TableGen's syntax.
+ Tokens.back()->Tok.setKind(tok::identifier);
+ Tokens.back()->Tok.setIdentifierInfo(nullptr);
+ Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+ return;
+ }
+ // +, - with numbers are literals. Not unary operators.
+ if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
+ Tokens.back()->Tok.setKind(tok::numeric_constant);
+ return;
+ }
+ if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
+ Tokens.back()->Tok.setKind(tok::numeric_constant);
+ return;
+ }
}
}
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 3dbf504c35ed55..cb93930e0fc3bc 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2210,16 +2210,24 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
EXPECT_TRUE(Tokens[0]->IsMultiline);
EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof(" the string. }]") - 1);
+ // Numeric literals.
+ Tokens = Annotate("1234");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("-1");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("+1234");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("0b0110");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+ Tokens = Annotate("0x1abC");
+ EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+
// Identifier tokens. In TableGen, identifiers can begin with a number.
// In ambiguous cases, the lexer tries to lex it as a number.
// Even if the try fails, it does not fall back to identifier lexing and
// regard as an error.
// The ambiguity is not documented. The result of those tests are based on the
// implementation of llvm::TGLexer::LexToken.
- Tokens = Annotate("1234");
- EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
- Tokens = Annotate("0x1abC");
- EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
// This is invalid syntax of number, but not an identifier.
Tokens = Annotate("0x1234x");
EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
@@ -2244,6 +2252,14 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
EXPECT_TOKEN(Tokens[6], tok::l_brace, TT_ElseLBrace);
Tokens = Annotate("defset Foo Def2 = {}");
EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_FunctionLBrace);
+
+ // Bang Operators.
+ Tokens = Annotate("!foreach");
+ EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+ Tokens = Annotate("!if");
+ EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+ Tokens = Annotate("!cond");
+ EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
}
TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
More information about the cfe-commits
mailing list