[clang] [clang-format] Support of TableGen value annotations. (PR #80299)

Hirofumi Nakamura via cfe-commits cfe-commits at lists.llvm.org
Sat Feb 10 06:39:22 PST 2024


https://github.com/hnakamura5 updated https://github.com/llvm/llvm-project/pull/80299

>From 36f83a124ea8ad27cfefa1d12ae5aa781f8e6e3e Mon Sep 17 00:00:00 2001
From: hnakamura5 <k.nakamura.hirofumi at gmail.com>
Date: Thu, 1 Feb 2024 23:07:42 +0900
Subject: [PATCH 1/3] [clang-format] Support of TableGen value annotations.

---
 clang/lib/Format/FormatToken.h                |  10 +
 clang/lib/Format/FormatTokenLexer.cpp         |   2 +-
 clang/lib/Format/TokenAnnotator.cpp           | 303 +++++++++++++++++-
 clang/lib/Format/UnwrappedLineParser.cpp      |  13 +-
 clang/unittests/Format/TokenAnnotatorTest.cpp |  45 +++
 5 files changed, 364 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index bace91b5f99b4d..0c1dce7a294082 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -150,7 +150,17 @@ namespace format {
   TYPE(StructuredBindingLSquare)                                               \
   TYPE(TableGenBangOperator)                                                   \
   TYPE(TableGenCondOperator)                                                   \
+  TYPE(TableGenCondOperatorColon)                                              \
+  TYPE(TableGenCondOperatorComma)                                              \
+  TYPE(TableGenDAGArgCloser)                                                   \
+  TYPE(TableGenDAGArgListColon)                                                \
+  TYPE(TableGenDAGArgListComma)                                                \
+  TYPE(TableGenDAGArgOpener)                                                   \
+  TYPE(TableGenListCloser)                                                     \
+  TYPE(TableGenListOpener)                                                     \
   TYPE(TableGenMultiLineString)                                                \
+  TYPE(TableGenTrailingPasteOperator)                                          \
+  TYPE(TableGenValueSuffix)                                                    \
   TYPE(TemplateCloser)                                                         \
   TYPE(TemplateOpener)                                                         \
   TYPE(TemplateString)                                                         \
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index d7de09ef0e12ab..27b2b1b619b1d3 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -816,7 +816,7 @@ void FormatTokenLexer::handleTableGenMultilineString() {
   auto CloseOffset = Lex->getBuffer().find("}]", OpenOffset);
   if (CloseOffset == StringRef::npos)
     return;
-  auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset + 2);
+  auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset - OpenOffset + 2);
   MultiLineString->TokenText = Text;
   resetLexer(SourceMgr.getFileOffset(
       Lex->getSourceLocation(Lex->getBufferLocation() - 2 + Text.size())));
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index df1c5bc19de1e8..afcf5f638ce4c8 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -256,6 +256,18 @@ class AnnotatingParser {
           }
         }
       }
+      if (Style.isTableGen()) {
+        if (CurrentToken->isOneOf(tok::comma, tok::equal)) {
+          // They appears as a separator. Unless it is not in class definition.
+          next();
+          continue;
+        }
+        // In angle, there must be Value like tokens. Types are also able to be
+        // parsed in the same way with Values.
+        if (!parseTableGenValue())
+          return false;
+        continue;
+      }
       if (!consumeToken())
         return false;
     }
@@ -388,6 +400,28 @@ class AnnotatingParser {
       Contexts.back().IsExpression = !IsForOrCatch;
     }
 
+    if (Style.isTableGen()) {
+      if (FormatToken *Prev = OpeningParen.Previous) {
+        if (Prev->is(TT_TableGenCondOperator)) {
+          Contexts.back().IsTableGenCondOpe = true;
+          Contexts.back().IsExpression = true;
+        } else if (Contexts.size() > 1 &&
+                   Contexts[Contexts.size() - 2].IsTableGenBangOpe) {
+          // Hack to handle bang operators. The parent context's flag
+          // was set by parseTableGenSimpleValue().
+          // We have to specify the context outside because the prev of "(" may
+          // be ">", not the bang operator in this case.
+          Contexts.back().IsTableGenBangOpe = true;
+          Contexts.back().IsExpression = true;
+        } else {
+          // Otherwise, this paren seems DAGArg.
+          if (!parseTableGenDAGArg())
+            return false;
+          return parseTableGenDAGArgAndList(&OpeningParen);
+        }
+      }
+    }
+
     // Infer the role of the l_paren based on the previous token if we haven't
     // detected one yet.
     if (PrevNonComment && OpeningParen.is(TT_Unknown)) {
@@ -549,6 +583,22 @@ class AnnotatingParser {
       if (CurrentToken->is(tok::comma))
         Contexts.back().CanBeExpression = true;
 
+      if (Style.isTableGen()) {
+        if (CurrentToken->is(tok::comma)) {
+          if (Contexts.back().IsTableGenCondOpe)
+            CurrentToken->setType(TT_TableGenCondOperatorComma);
+          next();
+        } else if (CurrentToken->is(tok::colon)) {
+          if (Contexts.back().IsTableGenCondOpe)
+            CurrentToken->setType(TT_TableGenCondOperatorColon);
+          next();
+        }
+        // In TableGen there must be Values in parens.
+        if (!parseTableGenValue())
+          return false;
+        continue;
+      }
+
       FormatToken *Tok = CurrentToken;
       if (!consumeToken())
         return false;
@@ -803,6 +853,8 @@ class AnnotatingParser {
           if (Left->BlockParameterCount > 1)
             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
         }
+        if (Style.isTableGen() && Left->is(TT_TableGenListOpener))
+          CurrentToken->setType(TT_TableGenListCloser);
         next();
         return true;
       }
@@ -833,6 +885,19 @@ class AnnotatingParser {
         Left->setType(TT_ArrayInitializerLSquare);
       }
       FormatToken *Tok = CurrentToken;
+      if (Style.isTableGen()) {
+        if (CurrentToken->isOneOf(tok::comma, tok::minus, tok::ellipsis)) {
+          // '-' and '...' appears as a separator in slice.
+          next();
+        } else {
+          // In TableGen there must be a list of Values in square brackets.
+          // It must be ValueList or SliceElements.
+          if (!parseTableGenValue())
+            return false;
+        }
+        updateParameterCount(Left, Tok);
+        continue;
+      }
       if (!consumeToken())
         return false;
       updateParameterCount(Left, Tok);
@@ -840,6 +905,187 @@ class AnnotatingParser {
     return false;
   }
 
+  void nextTableGenNonComment() {
+    next();
+    while (CurrentToken && CurrentToken->is(tok::comment))
+      next();
+  }
+
+  bool parseTableGenValue(bool ParseNameMode = false) {
+    if (!CurrentToken)
+      return false;
+    while (CurrentToken->is(tok::comment))
+      next();
+    if (!parseTableGenSimpleValue())
+      return false;
+    if (!CurrentToken)
+      return true;
+    // Value "#" [Value]
+    if (CurrentToken->is(tok::hash)) {
+      if (CurrentToken->Next &&
+          CurrentToken->Next->isOneOf(tok::colon, tok::semi, tok::l_brace)) {
+        // Trailing paste operator.
+        // These are only the allowed cases in TGParser::ParseValue().
+        CurrentToken->setType(TT_TableGenTrailingPasteOperator);
+        next();
+        return true;
+      }
+      FormatToken *HashTok = CurrentToken;
+      nextTableGenNonComment();
+      HashTok->setType(TT_Unknown);
+      if (!parseTableGenValue(ParseNameMode))
+        return false;
+    }
+    // In name mode, '{' is regarded as the end of the value.
+    // See TGParser::ParseValue in TGParser.cpp
+    if (ParseNameMode && CurrentToken->is(tok::l_brace))
+      return true;
+    if (CurrentToken->isOneOf(tok::l_brace, tok::l_square, tok::period)) {
+      // Delegate ValueSuffix to normal consumeToken
+      CurrentToken->setType(TT_TableGenValueSuffix);
+      FormatToken *Suffix = CurrentToken;
+      nextTableGenNonComment();
+      if (Suffix->is(tok::l_square)) {
+        return parseSquare();
+      } else if (Suffix->is(tok::l_brace)) {
+        Scopes.push_back(getScopeType(*Suffix));
+        return parseBrace();
+      }
+      return true;
+    }
+    return true;
+  }
+
+  // TokVarName    ::=  "$" ualpha (ualpha |  "0"..."9")*
+  bool tryToParseTableGenTokVar() {
+    if (!CurrentToken)
+      return false;
+    if (CurrentToken->is(tok::identifier) &&
+        CurrentToken->TokenText.front() == '$') {
+      nextTableGenNonComment();
+      return true;
+    }
+    return false;
+  }
+
+  // DagArg       ::=  Value [":" TokVarName] | TokVarName
+  bool parseTableGenDAGArg() {
+    if (tryToParseTableGenTokVar())
+      return true;
+    if (parseTableGenValue()) {
+      if (CurrentToken && CurrentToken->is(tok::colon)) {
+        CurrentToken->setType(TT_TableGenDAGArgListColon);
+        nextTableGenNonComment();
+        return tryToParseTableGenTokVar();
+      }
+      return true;
+    }
+    return false;
+  }
+
+  // SimpleValue6 ::=  "(" DagArg [DagArgList] ")"
+  // This parses SimpleValue 6's inside part of "(" ")"
+  bool parseTableGenDAGArgAndList(FormatToken *Opener) {
+    FormatToken *FirstTok = CurrentToken;
+    if (!parseTableGenDAGArg())
+      return false;
+    // Parse the [DagArgList] part
+    bool FirstDAGArgListElm = true;
+    while (CurrentToken) {
+      if (!FirstDAGArgListElm && CurrentToken->is(tok::comma)) {
+        CurrentToken->setType(TT_TableGenDAGArgListComma);
+        nextTableGenNonComment();
+      }
+      if (CurrentToken && CurrentToken->is(tok::r_paren)) {
+        CurrentToken->setType(TT_TableGenDAGArgCloser);
+        Opener->MatchingParen = CurrentToken;
+        CurrentToken->MatchingParen = Opener;
+        nextTableGenNonComment();
+        return true;
+      }
+      if (!parseTableGenDAGArg())
+        return false;
+      FirstDAGArgListElm = false;
+    }
+    return false;
+  }
+
+  bool parseTableGenSimpleValue() {
+    assert(Style.isTableGen());
+    if (!CurrentToken)
+      return false;
+    FormatToken *Tok = CurrentToken;
+    nextTableGenNonComment();
+    // SimpleValue 1, 2, 3: Literals
+    if (Tok->isOneOf(tok::numeric_constant, tok::string_literal,
+                     TT_TableGenMultiLineString, tok::kw_true, tok::kw_false,
+                     tok::question, tok::kw_int)) {
+      return true;
+    }
+    // SimpleValue 4: ValueList, Type
+    if (Tok->is(tok::l_brace)) {
+      Scopes.push_back(getScopeType(*Tok));
+      return parseBrace();
+    }
+    // SimpleValue 5: List initializer
+    if (Tok->is(tok::l_square)) {
+      Tok->setType(TT_TableGenListOpener);
+      if (!parseSquare())
+        return false;
+      if (Tok->is(tok::less)) {
+        CurrentToken->setType(TT_TemplateOpener);
+        return parseAngle();
+      }
+      return true;
+    }
+    // SimpleValue 6: DAGArg [DAGArgList]
+    // SimpleValue6 ::=  "(" DagArg [DagArgList] ")"
+    if (Tok->is(tok::l_paren)) {
+      Tok->setType(TT_TableGenDAGArgOpener);
+      return parseTableGenDAGArgAndList(Tok);
+    }
+    // SimpleValue 9: Bang operator
+    if (Tok->is(TT_TableGenBangOperator)) {
+      if (CurrentToken && CurrentToken->is(tok::less)) {
+        CurrentToken->setType(TT_TemplateOpener);
+        nextTableGenNonComment();
+        if (!parseAngle())
+          return false;
+      }
+      if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
+        return false;
+      nextTableGenNonComment();
+      // FIXME: Hack using inheritance to child context
+      Contexts.back().IsTableGenBangOpe = true;
+      bool Result = parseParens();
+      Contexts.back().IsTableGenBangOpe = false;
+      return Result;
+    }
+    // SimpleValue 9: Cond operator
+    if (Tok->is(TT_TableGenCondOperator)) {
+      Tok = CurrentToken;
+      nextTableGenNonComment();
+      if (!Tok || Tok->isNot(tok::l_paren))
+        return false;
+      bool Result = parseParens();
+      return Result;
+    }
+    // We have to check identifier at the last because the kind of bang/cond
+    // operators are also identifier.
+    // SimpleValue 7: Identifiers
+    if (Tok->is(tok::identifier)) {
+      // SimpleValue 8: Anonymous record
+      if (CurrentToken && CurrentToken->is(tok::less)) {
+        CurrentToken->setType(TT_TemplateOpener);
+        nextTableGenNonComment();
+        return parseAngle();
+      }
+      return true;
+    }
+
+    return false;
+  }
+
   bool couldBeInStructArrayInitializer() const {
     if (Contexts.size() < 2)
       return false;
@@ -880,6 +1126,8 @@ class AnnotatingParser {
          OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
       Contexts.back().VerilogMayBeConcatenation = true;
     }
+    if (Style.isTableGen())
+      Contexts.back().ColonIsDictLiteral = false;
 
     unsigned CommaCount = 0;
     while (CurrentToken) {
@@ -906,7 +1154,7 @@ class AnnotatingParser {
         FormatToken *Previous = CurrentToken->getPreviousNonComment();
         if (Previous->is(TT_JsTypeOptionalQuestion))
           Previous = Previous->getPreviousNonComment();
-        if ((CurrentToken->is(tok::colon) &&
+        if ((CurrentToken->is(tok::colon) && !Style.isTableGen() &&
              (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) ||
             Style.isProto()) {
           OpeningBrace.setType(TT_DictLiteral);
@@ -915,10 +1163,12 @@ class AnnotatingParser {
             Previous->setType(TT_SelectorName);
           }
         }
-        if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown))
+        if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown) &&
+            !Style.isTableGen()) {
           OpeningBrace.setType(TT_DictLiteral);
-        else if (Style.isJavaScript())
+        } else if (Style.isJavaScript()) {
           OpeningBrace.overwriteFixedType(TT_DictLiteral);
+        }
       }
       if (CurrentToken->is(tok::comma)) {
         if (Style.isJavaScript())
@@ -989,6 +1239,9 @@ class AnnotatingParser {
     // operators.
     if (Tok->is(TT_VerilogTableItem))
       return true;
+    // Multi-line string itself is a single annotated token.
+    if (Tok->is(TT_TableGenMultiLineString))
+      return true;
     switch (Tok->Tok.getKind()) {
     case tok::plus:
     case tok::minus:
@@ -1119,6 +1372,10 @@ class AnnotatingParser {
         Tok->setType(TT_ObjCMethodExpr);
       } else if (Contexts.back().ContextKind == tok::l_paren &&
                  !Line.InPragmaDirective) {
+        if (Style.isTableGen() && Contexts.back().IsTableGenDAGArg) {
+          Tok->setType(TT_TableGenDAGArgListColon);
+          break;
+        }
         Tok->setType(TT_InlineASMColon);
       }
       break;
@@ -1130,6 +1387,14 @@ class AnnotatingParser {
         Tok->setType(TT_JsTypeOperator);
       break;
     case tok::kw_if:
+      if (Style.isTableGen()) {
+        // In TableGen it has the form 'if' <value> 'then'.
+        if (!parseTableGenValue())
+          return false;
+        if (CurrentToken && CurrentToken->is(Keywords.kw_then))
+          next(); // skip then
+        break;
+      }
       if (CurrentToken &&
           CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) {
         next();
@@ -1235,6 +1500,8 @@ class AnnotatingParser {
       }
       break;
     case tok::l_square:
+      if (Style.isTableGen())
+        Tok->setType(TT_TableGenListOpener);
       if (!parseSquare())
         return false;
       break;
@@ -1264,6 +1531,8 @@ class AnnotatingParser {
           if (Previous && Previous->getType() != TT_DictLiteral)
             Previous->setType(TT_SelectorName);
         }
+        if (Style.isTableGen())
+          Tok->setType(TT_TemplateOpener);
       } else {
         Tok->setType(TT_BinaryOperator);
         NonTemplateLess.insert(Tok);
@@ -1423,11 +1692,30 @@ class AnnotatingParser {
         if (!Tok->getPreviousNonComment())
           Line.IsContinuation = true;
       }
+      if (Style.isTableGen()) {
+        if (Tok->is(Keywords.kw_assert)) {
+          if (!parseTableGenValue())
+            return false;
+        } else if (Tok->isOneOf(Keywords.kw_def, Keywords.kw_defm) &&
+                   (!Tok->Next ||
+                    !Tok->Next->isOneOf(tok::colon, tok::l_brace))) {
+          // The case NameValue appears.
+          if (!parseTableGenValue(true))
+            return false;
+        }
+      }
       break;
     case tok::arrow:
       if (Tok->Previous && Tok->Previous->is(tok::kw_noexcept))
         Tok->setType(TT_TrailingReturnArrow);
       break;
+    case tok::equal:
+      // In TableGen, there must be a value after "=";
+      if (Style.isTableGen()) {
+        if (!parseTableGenValue())
+          return false;
+      }
+      break;
     default:
       break;
     }
@@ -1757,6 +2045,9 @@ class AnnotatingParser {
     // Whether the braces may mean concatenation instead of structure or array
     // literal.
     bool VerilogMayBeConcatenation = false;
+    bool IsTableGenDAGArg = false;
+    bool IsTableGenBangOpe = false;
+    bool IsTableGenCondOpe = false;
     enum {
       Unknown,
       // Like the part after `:` in a constructor.
@@ -2061,6 +2352,9 @@ class AnnotatingParser {
         // In JavaScript, `interface X { foo?(): bar; }` is an optional method
         // on the interface, not a ternary expression.
         Current.setType(TT_JsTypeOptionalQuestion);
+      } else if (Style.isTableGen()) {
+        // In TableGen, '?' is just an identifier like token.
+        Current.setType(TT_Unknown);
       } else {
         Current.setType(TT_ConditionalExpr);
       }
@@ -2239,6 +2533,9 @@ class AnnotatingParser {
       // keywords such as let and def* defines names.
       if (Keywords.isTableGenDefinition(*PreviousNotConst))
         return true;
+      // Otherwise C++ style declarations is available only inside the brace.
+      if (Contexts.back().ContextKind != tok::l_brace)
+        return false;
     }
 
     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index b904e0e56d9eb3..371fc58b90ab7e 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -495,12 +495,15 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
     do {
       NextTok = Tokens->getNextToken();
     } while (NextTok->is(tok::comment));
-    while (NextTok->is(tok::hash) && !Line->InMacroBody) {
-      NextTok = Tokens->getNextToken();
-      do {
+    if (!Style.isTableGen()) {
+      // InTableGen, '#' is like binary operator. Not a preprocessor directive.
+      while (NextTok->is(tok::hash) && !Line->InMacroBody) {
         NextTok = Tokens->getNextToken();
-      } while (NextTok->is(tok::comment) ||
-               (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)));
+        do {
+          NextTok = Tokens->getNextToken();
+        } while (NextTok->is(tok::comment) ||
+                 (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)));
+      }
     }
 
     switch (Tok->Tok.getKind()) {
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index f3e443e8829bd4..374434f8482d31 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2265,6 +2265,51 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
   EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator);
   Tokens = Annotate("!cond");
   EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
+
+  auto AnnotateValue = [this, &Style](llvm::StringRef Code) {
+    // Values are annotated only in specific context.
+    auto Result = annotate(("def X { let V = " + Code + "; }").str(), Style);
+    return decltype(Result){Result.begin() + 6, Result.end() - 3};
+  };
+  // Both of bang/cond operators.
+  Tokens = AnnotateValue("!cond(!eq(x, 0): 1, true: x)");
+  ASSERT_EQ(Tokens.size(), 15u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator);
+  EXPECT_TOKEN(Tokens[2], tok::identifier, TT_TableGenBangOperator);
+  EXPECT_TOKEN(Tokens[8], tok::colon, TT_TableGenCondOperatorColon);
+  EXPECT_TOKEN(Tokens[10], tok::comma, TT_TableGenCondOperatorComma);
+  EXPECT_TOKEN(Tokens[12], tok::colon, TT_TableGenCondOperatorColon);
+  // DAGArg values with operator identifier
+  Tokens = AnnotateValue("(ins type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[3], tok::colon, TT_TableGenDAGArgListColon);
+  EXPECT_TOKEN(Tokens[4], tok::identifier, TT_Unknown); // $src1
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListComma);
+  EXPECT_TOKEN(Tokens[7], tok::colon, TT_TableGenDAGArgListColon);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
+  // List literal
+  Tokens = AnnotateValue("[1, 2, 3]");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_square, TT_TableGenListOpener);
+  EXPECT_TOKEN(Tokens[6], tok::r_square, TT_TableGenListCloser);
+  // Suffixes of values
+  Tokens = AnnotateValue("valid.field");
+  ASSERT_EQ(Tokens.size(), 3u) << Tokens;
+  EXPECT_TOKEN(Tokens[1], tok::period, TT_TableGenValueSuffix);
+  // Code
+  Tokens = AnnotateValue("[{ code is multiline string }]");
+  ASSERT_EQ(Tokens.size(), 1u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::string_literal, TT_TableGenMultiLineString);
+
+  // The definition
+  Tokens = annotate("def Def : Parent<Child> {}", Style);
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens; // This contains eof.
+  // We use inheritance colon and function brace. They are enough.
+  EXPECT_TOKEN(Tokens[2], tok::colon, TT_InheritanceColon);
+  EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener);
+  EXPECT_TOKEN(Tokens[6], tok::greater, TT_TemplateCloser);
+  EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_FunctionLBrace);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {

>From 9fceafe2e75d636c83f9f02b748c8400b7767678 Mon Sep 17 00:00:00 2001
From: hnakamura5 <k.nakamura.hirofumi at gmail.com>
Date: Tue, 6 Feb 2024 21:55:53 +0900
Subject: [PATCH 2/3] Fixed the reviewed points.

---
 clang/lib/Format/TokenAnnotator.cpp | 46 ++++++++++++++++-------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index afcf5f638ce4c8..9c99e4af97941d 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -258,7 +258,7 @@ class AnnotatingParser {
       }
       if (Style.isTableGen()) {
         if (CurrentToken->isOneOf(tok::comma, tok::equal)) {
-          // They appears as a separator. Unless it is not in class definition.
+          // They appear as separators. Unless they are not in class definition.
           next();
           continue;
         }
@@ -905,12 +905,17 @@ class AnnotatingParser {
     return false;
   }
 
-  void nextTableGenNonComment() {
+  void skipToNextNonComment() {
     next();
     while (CurrentToken && CurrentToken->is(tok::comment))
       next();
   }
 
+  // Simplified parser for TableGen Value.
+  // It consists of SimpleValues, SimpleValues with Suffixes, and Value followed
+  // by '#', paste operator.
+  // There also exists the case the Value is parsed as NameValue.
+  // In this case, the Value ends if '{' is found.
   bool parseTableGenValue(bool ParseNameMode = false) {
     if (!CurrentToken)
       return false;
@@ -931,7 +936,7 @@ class AnnotatingParser {
         return true;
       }
       FormatToken *HashTok = CurrentToken;
-      nextTableGenNonComment();
+      skipToNextNonComment();
       HashTok->setType(TT_Unknown);
       if (!parseTableGenValue(ParseNameMode))
         return false;
@@ -940,42 +945,43 @@ class AnnotatingParser {
     // See TGParser::ParseValue in TGParser.cpp
     if (ParseNameMode && CurrentToken->is(tok::l_brace))
       return true;
+    // These tokens indicates this is a value with suffixes.
     if (CurrentToken->isOneOf(tok::l_brace, tok::l_square, tok::period)) {
-      // Delegate ValueSuffix to normal consumeToken
       CurrentToken->setType(TT_TableGenValueSuffix);
       FormatToken *Suffix = CurrentToken;
-      nextTableGenNonComment();
-      if (Suffix->is(tok::l_square)) {
+      skipToNextNonComment();
+      if (Suffix->is(tok::l_square))
         return parseSquare();
-      } else if (Suffix->is(tok::l_brace)) {
+      if (Suffix->is(tok::l_brace)) {
         Scopes.push_back(getScopeType(*Suffix));
         return parseBrace();
       }
-      return true;
     }
     return true;
   }
 
   // TokVarName    ::=  "$" ualpha (ualpha |  "0"..."9")*
+  // Appears as a part of DagArg.
   bool tryToParseTableGenTokVar() {
     if (!CurrentToken)
       return false;
     if (CurrentToken->is(tok::identifier) &&
         CurrentToken->TokenText.front() == '$') {
-      nextTableGenNonComment();
+      skipToNextNonComment();
       return true;
     }
     return false;
   }
 
   // DagArg       ::=  Value [":" TokVarName] | TokVarName
+  // Appears as a part of SimpleValue6.
   bool parseTableGenDAGArg() {
     if (tryToParseTableGenTokVar())
       return true;
     if (parseTableGenValue()) {
       if (CurrentToken && CurrentToken->is(tok::colon)) {
         CurrentToken->setType(TT_TableGenDAGArgListColon);
-        nextTableGenNonComment();
+        skipToNextNonComment();
         return tryToParseTableGenTokVar();
       }
       return true;
@@ -994,13 +1000,13 @@ class AnnotatingParser {
     while (CurrentToken) {
       if (!FirstDAGArgListElm && CurrentToken->is(tok::comma)) {
         CurrentToken->setType(TT_TableGenDAGArgListComma);
-        nextTableGenNonComment();
+        skipToNextNonComment();
       }
       if (CurrentToken && CurrentToken->is(tok::r_paren)) {
         CurrentToken->setType(TT_TableGenDAGArgCloser);
         Opener->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Opener;
-        nextTableGenNonComment();
+        skipToNextNonComment();
         return true;
       }
       if (!parseTableGenDAGArg())
@@ -1015,7 +1021,7 @@ class AnnotatingParser {
     if (!CurrentToken)
       return false;
     FormatToken *Tok = CurrentToken;
-    nextTableGenNonComment();
+    skipToNextNonComment();
     // SimpleValue 1, 2, 3: Literals
     if (Tok->isOneOf(tok::numeric_constant, tok::string_literal,
                      TT_TableGenMultiLineString, tok::kw_true, tok::kw_false,
@@ -1048,13 +1054,13 @@ class AnnotatingParser {
     if (Tok->is(TT_TableGenBangOperator)) {
       if (CurrentToken && CurrentToken->is(tok::less)) {
         CurrentToken->setType(TT_TemplateOpener);
-        nextTableGenNonComment();
+        skipToNextNonComment();
         if (!parseAngle())
           return false;
       }
       if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
         return false;
-      nextTableGenNonComment();
+      skipToNextNonComment();
       // FIXME: Hack using inheritance to child context
       Contexts.back().IsTableGenBangOpe = true;
       bool Result = parseParens();
@@ -1064,7 +1070,7 @@ class AnnotatingParser {
     // SimpleValue 9: Cond operator
     if (Tok->is(TT_TableGenCondOperator)) {
       Tok = CurrentToken;
-      nextTableGenNonComment();
+      skipToNextNonComment();
       if (!Tok || Tok->isNot(tok::l_paren))
         return false;
       bool Result = parseParens();
@@ -1077,7 +1083,7 @@ class AnnotatingParser {
       // SimpleValue 8: Anonymous record
       if (CurrentToken && CurrentToken->is(tok::less)) {
         CurrentToken->setType(TT_TemplateOpener);
-        nextTableGenNonComment();
+        skipToNextNonComment();
         return parseAngle();
       }
       return true;
@@ -1711,10 +1717,8 @@ class AnnotatingParser {
       break;
     case tok::equal:
       // In TableGen, there must be a value after "=";
-      if (Style.isTableGen()) {
-        if (!parseTableGenValue())
-          return false;
-      }
+      if (Style.isTableGen() && !parseTableGenValue())
+        return false;
       break;
     default:
       break;

>From 48364ff29bf911b0d272d4bf79cdc0dc3221306b Mon Sep 17 00:00:00 2001
From: hnakamura5 <k.nakamura.hirofumi at gmail.com>
Date: Sat, 10 Feb 2024 23:39:10 +0900
Subject: [PATCH 3/3] Added few comments.

---
 clang/lib/Format/TokenAnnotator.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 9c99e4af97941d..cb7fcfa6c093ab 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -911,7 +911,7 @@ class AnnotatingParser {
       next();
   }
 
-  // Simplified parser for TableGen Value.
+  // Simplified parser for TableGen Value. Returns true on success.
   // It consists of SimpleValues, SimpleValues with Suffixes, and Value followed
   // by '#', paste operator.
   // There also exists the case the Value is parsed as NameValue.
@@ -962,6 +962,7 @@ class AnnotatingParser {
 
   // TokVarName    ::=  "$" ualpha (ualpha |  "0"..."9")*
   // Appears as a part of DagArg.
+  // This does not change the current token on fail.
   bool tryToParseTableGenTokVar() {
     if (!CurrentToken)
       return false;



More information about the cfe-commits mailing list