[clang] [clang-format] Support of TableGen tokens with unary operator like form, bang operators and numeric literal. (PR #78996)

Hirofumi Nakamura via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 22 07:56:58 PST 2024


https://github.com/hnakamura5 created https://github.com/llvm/llvm-project/pull/78996

Adds the support for tokens that have forms like unary operators.
- bang operators:  `!name`
- cond operator: `!cond`
- numeric literals: `+1`, `-1`
cond operator are one of bang operators but is distinguished because it has very specific syntax.

>From af522a6ac1a2620408ec2933261ad9d17066ddff Mon Sep 17 00:00:00 2001
From: hnakamura5 <k.nakamura.hirofumi at gmail.com>
Date: Tue, 23 Jan 2024 00:50:17 +0900
Subject: [PATCH] [clang-format] Support of TableGen tokens with unary operator
 like form, bang operators and numeric literal.

---
 clang/lib/Format/FormatToken.h                |  2 +
 clang/lib/Format/FormatTokenLexer.cpp         | 45 ++++++++++++++++---
 clang/unittests/Format/TokenAnnotatorTest.cpp | 24 ++++++++--
 3 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index dede89f2600150..bace91b5f99b4d 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -148,6 +148,8 @@ namespace format {
   TYPE(StructLBrace)                                                           \
   TYPE(StructRBrace)                                                           \
   TYPE(StructuredBindingLSquare)                                               \
+  TYPE(TableGenBangOperator)                                                   \
+  TYPE(TableGenCondOperator)                                                   \
   TYPE(TableGenMultiLineString)                                                \
   TYPE(TemplateCloser)                                                         \
   TYPE(TemplateOpener)                                                         \
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 52a55ea23b5f2f..d7de09ef0e12ab 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -276,13 +276,44 @@ void FormatTokenLexer::tryMergePreviousTokens() {
       return;
     }
   }
-  // TableGen's Multi line string starts with [{
-  if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace},
-                                           TT_TableGenMultiLineString)) {
-    // Set again with finalizing. This must never be annotated as other types.
-    Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
-    Tokens.back()->Tok.setKind(tok::string_literal);
-    return;
+  if (Style.isTableGen()) {
+    // TableGen's Multi line string starts with [{
+    if (tryMergeTokens({tok::l_square, tok::l_brace},
+                       TT_TableGenMultiLineString)) {
+      // Set again with finalizing. This must never be annotated as other types.
+      Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);
+      Tokens.back()->Tok.setKind(tok::string_literal);
+      return;
+    }
+    // TableGen's bang operator is the form !<name>.
+    // !cond is a special case with specific syntax.
+    if (tryMergeTokens({tok::exclaim, tok::identifier},
+                       TT_TableGenBangOperator)) {
+      Tokens.back()->Tok.setKind(tok::identifier);
+      Tokens.back()->Tok.setIdentifierInfo(nullptr);
+      if (Tokens.back()->TokenText == "!cond")
+        Tokens.back()->setFinalizedType(TT_TableGenCondOperator);
+      else
+        Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+      return;
+    }
+    if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {
+      // Here, "! if" becomes "!if".  That is, ! captures if even when the space
+      // exists. That is only one possibility in TableGen's syntax.
+      Tokens.back()->Tok.setKind(tok::identifier);
+      Tokens.back()->Tok.setIdentifierInfo(nullptr);
+      Tokens.back()->setFinalizedType(TT_TableGenBangOperator);
+      return;
+    }
+    // +, - with numbers are literals. Not unary operators.
+    if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {
+      Tokens.back()->Tok.setKind(tok::numeric_constant);
+      return;
+    }
+    if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {
+      Tokens.back()->Tok.setKind(tok::numeric_constant);
+      return;
+    }
   }
 }
 
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 3dbf504c35ed55..cb93930e0fc3bc 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2210,16 +2210,24 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
   EXPECT_TRUE(Tokens[0]->IsMultiline);
   EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof("   the string. }]") - 1);
 
+  // Numeric literals.
+  Tokens = Annotate("1234");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+  Tokens = Annotate("-1");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+  Tokens = Annotate("+1234");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+  Tokens = Annotate("0b0110");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+  Tokens = Annotate("0x1abC");
+  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
+
   // Identifier tokens. In TableGen, identifiers can begin with a number.
   // In ambiguous cases, the lexer tries to lex it as a number.
   // Even if the try fails, it does not fall back to identifier lexing and
   // regard as an error.
   // The ambiguity is not documented. The result of those tests are based on the
   // implementation of llvm::TGLexer::LexToken.
-  Tokens = Annotate("1234");
-  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
-  Tokens = Annotate("0x1abC");
-  EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
   // This is invalid syntax of number, but not an identifier.
   Tokens = Annotate("0x1234x");
   EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown);
@@ -2244,6 +2252,14 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
   EXPECT_TOKEN(Tokens[6], tok::l_brace, TT_ElseLBrace);
   Tokens = Annotate("defset Foo Def2 = {}");
   EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_FunctionLBrace);
+
+  // Bang Operators.
+  Tokens = Annotate("!foreach");
+  EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+  Tokens = Annotate("!if");
+  EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
+  Tokens = Annotate("!cond");
+  EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {



More information about the cfe-commits mailing list