[clang] 0058263 - [clang-format] Support of TableGen tokens with unary operator like form, bang operators and numeric literals. (#78996)

via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 30 07:30:41 PST 2024


Author: Hirofumi Nakamura
Date: 2024-01-31T00:30:37+09:00
New Revision: 00582636009d51c5781b9cae8fde858ab3758306

URL: https://github.com/llvm/llvm-project/commit/00582636009d51c5781b9cae8fde858ab3758306
DIFF: https://github.com/llvm/llvm-project/commit/00582636009d51c5781b9cae8fde858ab3758306.diff

LOG: [clang-format] Support of TableGen tokens with unary operator like form, bang operators and numeric literals. (#78996)

Adds the support for tokens that have forms like unary operators.
- bang operators:  `!name`
- cond operator: `!cond`
- numeric literals: `+1`, `-1`
cond operator are one of bang operators but is distinguished because it has very specific syntax.

Added: 
    

Modified: 
    clang/lib/Format/FormatToken.h
    clang/lib/Format/FormatTokenLexer.cpp
    clang/unittests/Format/TokenAnnotatorTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index dede89f260015..bace91b5f99b4 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -148,6 +148,8 @@ namespace format {
   TYPE(StructLBrace)                                                           \
   TYPE(StructRBrace)                                                           \
   TYPE(StructuredBindingLSquare)                                               \
+  TYPE(TableGenBangOperator)                                                   \
+  TYPE(TableGenCondOperator)                                                   \
   TYPE(TableGenMultiLineString)                                                \
   TYPE(TemplateCloser)                                                         \
   TYPE(TemplateOpener)                                                         \

diff  --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 52a55ea23b5f2..d7de09ef0e12a 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -276,13 +276,44 @@ void FormatTokenLexer::tryMergePreviousTokens() {
       return;
     }
   }
-  // TableGen's Multi line string starts with [{
-  if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace},
-                                           TT_TableGenMultiLineString)) {
-    // Set again with finalizing. This must never be annotated as other types.
-    Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
-    Tokens.back()->Tok.setKind(tok::string_literal);
-    return;
+  if (Style.isTableGen()) {
+    // TableGen's Multi line string starts with [{
+    if (tryMergeTokens({tok::l_square, tok::l_brace},
+                       TT_TableGenMultiLineString)) {
+      // Set again with finalizing. This must never be annotated as other types.
+      Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
+      Tokens.back()->Tok.setKind(tok::string_literal);
+      return;
+    }
+    // TableGen's bang operator is the form !<name>.
+    // !cond is a special case with specific syntax.
+    if (tryMergeTokens({tok::exclaim, tok::identifier},
+                       TT_TableGenBangOperator)) {
+      Tokens.back()->Tok.setKind(tok::identifier);
+      Tokens.back()->Tok.setIdentifierInfo(nullptr);
+      if (Tokens.back()->TokenText == "!cond")
+        Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
+      else
+        Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+      return;
+    }
+    if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
+      // Here, "! if" becomes "!if".  That is, ! captures if even when the space
+      // exists. That is only one possibility in TableGen's syntax.
+      Tokens.back()->Tok.setKind(tok::identifier);
+      Tokens.back()->Tok.setIdentifierInfo(nullptr);
+      Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+      return;
+    }
+    // +, - with numbers are literals. Not unary operators.
+    if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
+      Tokens.back()->Tok.setKind(tok::numeric_constant);
+      return;
+    }
+    if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
+      Tokens.back()->Tok.setKind(tok::numeric_constant);
+      return;
+    }
   }
 }
 

diff  --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 67678c18963b1..f3e443e8829bd 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2215,16 +2215,24 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
   EXPECT_TRUE(Tokens[0]->IsMultiline);
   EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof("   the string. }]") - 1);
 
+  // Numeric literals.
+  Tokens = Annotate("1234");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+  Tokens = Annotate("-1");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+  Tokens = Annotate("+1234");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+  Tokens = Annotate("0b0110");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+  Tokens = Annotate("0x1abC");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+
   // Identifier tokens. In TableGen, identifiers can begin with a number.
   // In ambiguous cases, the lexer tries to lex it as a number.
   // Even if the try fails, it does not fall back to identifier lexing and
   // regard as an error.
   // The ambiguity is not documented. The result of those tests are based on the
   // implementation of llvm::TGLexer::LexToken.
-  Tokens = Annotate("1234");
-  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
-  Tokens = Annotate("0x1abC");
-  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
   // This is invalid syntax of number, but not an identifier.
   Tokens = Annotate("0x1234x");
   EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
@@ -2249,6 +2257,14 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
   EXPECT_TOKEN(Tokens[6], tok::l_brace, TT_ElseLBrace);
   Tokens = Annotate("defset Foo Def2 = {}");
   EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_FunctionLBrace);
+
+  // Bang Operators.
+  Tokens = Annotate("!foreach");
+  EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+  Tokens = Annotate("!if");
+  EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+  Tokens = Annotate("!cond");
+  EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {


        


More information about the cfe-commits mailing list