[clang] [clang-format] TableGen keywords support. (PR #77477)

Hirofumi Nakamura via cfe-commits cfe-commits at lists.llvm.org
Thu Jan 11 03:37:41 PST 2024


https://github.com/hnakamura5 updated https://github.com/llvm/llvm-project/pull/77477

>From 4e9f2bc86c4e48c4d412fea7804c226f041d022c Mon Sep 17 00:00:00 2001
From: hnakamura5 <hnakamura5 at outlook.com>
Date: Tue, 9 Jan 2024 22:57:53 +0900
Subject: [PATCH] [clang-format] TableGen keywords support.

Add TableGen keywords to the additional keyword list of the formatter.
---
 clang/include/clang/Format/Format.h           |  1 +
 clang/lib/Format/FormatToken.h                | 75 +++++++++++++++++++
 clang/lib/Format/FormatTokenLexer.cpp         |  3 +
 clang/lib/Format/TokenAnnotator.cpp           |  6 ++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 18 +++++
 5 files changed, 103 insertions(+)

diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 59b645ecab715b..5ffd63ee73fc36 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -3055,6 +3055,7 @@ struct FormatStyle {
   bool isProto() const {
     return Language == LK_Proto || Language == LK_TextProto;
   }
+  bool isTableGen() const { return Language == LK_TableGen; }
 
   /// Language, this format style is targeted at.
   /// \version 3.5
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 3f9664f8f78a3e..bd21a972441a98 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -1202,6 +1202,21 @@ struct AdditionalKeywords {
     kw_verilogHashHash = &IdentTable.get("##");
     kw_apostrophe = &IdentTable.get("\'");
 
+    // TableGen keywords
+    kw_bit = &IdentTable.get("bit");
+    kw_bits = &IdentTable.get("bits");
+    kw_code = &IdentTable.get("code");
+    kw_dag = &IdentTable.get("dag");
+    kw_def = &IdentTable.get("def");
+    kw_defm = &IdentTable.get("defm");
+    kw_defset = &IdentTable.get("defset");
+    kw_defvar = &IdentTable.get("defvar");
+    kw_dump = &IdentTable.get("dump");
+    kw_include = &IdentTable.get("include");
+    kw_list = &IdentTable.get("list");
+    kw_multiclass = &IdentTable.get("multiclass");
+    kw_then = &IdentTable.get("then");
+
     // Keep this at the end of the constructor to make sure everything here
     // is
     // already initialized.
@@ -1294,6 +1309,27 @@ struct AdditionalKeywords {
          kw_wildcard,     kw_wire,
          kw_with,         kw_wor,
          kw_verilogHash,  kw_verilogHashHash});
+
+    TableGenExtraKeywords = std::unordered_set<IdentifierInfo *>({
+        kw_assert,
+        kw_bit,
+        kw_bits,
+        kw_code,
+        kw_dag,
+        kw_def,
+        kw_defm,
+        kw_defset,
+        kw_defvar,
+        kw_dump,
+        kw_foreach,
+        kw_in,
+        kw_include,
+        kw_let,
+        kw_list,
+        kw_multiclass,
+        kw_string,
+        kw_then,
+    });
   }
 
   // Context sensitive keywords.
@@ -1539,6 +1575,21 @@ struct AdditionalKeywords {
   // Symbols in Verilog that don't exist in C++.
   IdentifierInfo *kw_apostrophe;
 
+  // TableGen keywords
+  IdentifierInfo *kw_bit;
+  IdentifierInfo *kw_bits;
+  IdentifierInfo *kw_code;
+  IdentifierInfo *kw_dag;
+  IdentifierInfo *kw_def;
+  IdentifierInfo *kw_defm;
+  IdentifierInfo *kw_defset;
+  IdentifierInfo *kw_defvar;
+  IdentifierInfo *kw_dump;
+  IdentifierInfo *kw_include;
+  IdentifierInfo *kw_list;
+  IdentifierInfo *kw_multiclass;
+  IdentifierInfo *kw_then;
+
   /// Returns \c true if \p Tok is a keyword or an identifier.
   bool isWordLike(const FormatToken &Tok) const {
     // getIdentifierinfo returns non-null for keywords as well as identifiers.
@@ -1811,6 +1862,27 @@ struct AdditionalKeywords {
     }
   }
 
+  bool isTableGenDefinition(const FormatToken &Tok) const {
+    return Tok.isOneOf(kw_def, kw_defm, kw_defset, kw_defvar, kw_multiclass,
+                       kw_let, tok::kw_class);
+  }
+
+  bool isTableGenKeyword(const FormatToken &Tok) const {
+    switch (Tok.Tok.getKind()) {
+    case tok::kw_class:
+    case tok::kw_else:
+    case tok::kw_false:
+    case tok::kw_if:
+    case tok::kw_int:
+    case tok::kw_true:
+      return true;
+    default:
+      return Tok.is(tok::identifier) &&
+             TableGenExtraKeywords.find(Tok.Tok.getIdentifierInfo()) !=
+                 TableGenExtraKeywords.end();
+    }
+  }
+
 private:
   /// The JavaScript keywords beyond the C++ keyword set.
   std::unordered_set<IdentifierInfo *> JsExtraKeywords;
@@ -1820,6 +1892,9 @@ struct AdditionalKeywords {
 
   /// The Verilog keywords beyond the C++ keyword set.
   std::unordered_set<IdentifierInfo *> VerilogExtraKeywords;
+
+  /// The TableGen keywords beyond the C++ keyword set.
+  std::unordered_set<IdentifierInfo *> TableGenExtraKeywords;
 };
 
 inline bool isLineComment(const FormatToken &FormatTok) {
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 61430282c6f88c..a1fd6dd6effe6c 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -1182,6 +1182,9 @@ FormatToken *FormatTokenLexer::getNextToken() {
                                   tok::kw_operator)) {
       FormatTok->Tok.setKind(tok::identifier);
       FormatTok->Tok.setIdentifierInfo(nullptr);
+    } else if (Style.isTableGen() && !Keywords.isTableGenKeyword(*FormatTok)) {
+      FormatTok->Tok.setKind(tok::identifier);
+      FormatTok->Tok.setIdentifierInfo(nullptr);
     }
   } else if (FormatTok->is(tok::greatergreater)) {
     FormatTok->Tok.setKind(tok::greater);
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 8b43438c72dfe1..26d8c4585562e0 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -2234,6 +2234,12 @@ class AnnotatingParser {
     if (PreviousNotConst->ClosesRequiresClause)
       return false;
 
+    if (Style.isTableGen()) {
+      // keywords such as let and def* defines names.
+      if (Keywords.isTableGenDefinition(*PreviousNotConst))
+        return true;
+    }
+
     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
                        PreviousNotConst->Previous &&
                        PreviousNotConst->Previous->is(tok::hash);
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 494205a1f2d8ed..33a320f7bb06e6 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2172,6 +2172,24 @@ TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) {
   EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
 }
 
+TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
+  auto Style = getLLVMStyle(FormatStyle::LK_TableGen);
+  ASSERT_TRUE(Style.isTableGen());
+
+  TestLexer Lexer(Allocator, Buffers, Style);
+  AdditionalKeywords Keywords(Lexer.IdentTable);
+  auto Annotate = [&Lexer, &Style](llvm::StringRef Code) {
+    return Lexer.annotate(Code);
+  };
+
+  // Additional keywords representation test.
+  auto Tokens = Annotate("def foo : Bar<1>;");
+  ASSERT_TRUE(Keywords.isTableGenKeyword(*Tokens[0]));
+  ASSERT_TRUE(Keywords.isTableGenDefinition(*Tokens[0]));
+  ASSERT_TRUE(Tokens[0]->is(Keywords.kw_def));
+  ASSERT_TRUE(Tokens[1]->is(TT_StartOfName));
+}
+
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
   auto Tokens = annotate("Class::Class() : BaseClass(), Member() {}");
 



More information about the cfe-commits mailing list