[clang] [clang-format] Add Options to break inside the TableGen DAGArg. (PR #83149)

Hirofumi Nakamura via cfe-commits cfe-commits at lists.llvm.org
Tue Feb 27 08:16:57 PST 2024


https://github.com/hnakamura5 created https://github.com/llvm/llvm-project/pull/83149

This adds two options to control the line break inside TableGen DAGArg.
- TableGenBreakInsideDAGArgList 
- TableGenBreakingDAGArgOperators


>From becb28f6daa1fed9cabe40375a7ed863207b6bd2 Mon Sep 17 00:00:00 2001
From: hnakamura5 <k.nakamura.hirofumi at gmail.com>
Date: Wed, 28 Feb 2024 01:10:12 +0900
Subject: [PATCH] [clang-format] Add Options to break inside the TableGen
 DAGArg.

---
 clang/docs/ClangFormatStyleOptions.rst        | 42 ++++++++++++++++
 clang/include/clang/Format/Format.h           | 46 +++++++++++++++--
 clang/lib/Format/ContinuationIndenter.cpp     |  3 +-
 clang/lib/Format/Format.cpp                   |  6 +++
 clang/lib/Format/FormatToken.h                |  2 +
 clang/lib/Format/TokenAnnotator.cpp           | 49 ++++++++++++++++++-
 clang/unittests/Format/FormatTestTableGen.cpp | 42 ++++++++++++++++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 41 ++++++++++++++++
 8 files changed, 226 insertions(+), 5 deletions(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index df399a229d8d4f..9b055d16b24ac9 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -6158,6 +6158,48 @@ the configuration (without a prefix: ``Auto``).
 **TabWidth** (``Unsigned``) :versionbadge:`clang-format 3.7` :ref:`¶ <TabWidth>`
   The number of columns used for tab stops.
 
+.. _TableGenBreakInsideDAGArgList:
+
+**TableGenBreakInsideDAGArgList** (``Boolean``) :versionbadge:`clang-format 19` :ref:`¶ <TableGenBreakInsideDAGArgList>`
+  Insert the line break for each element of DAGArg list in TableGen.
+
+
+  .. code-block:: c++
+
+    let DAGArgIns = (ins
+        i32:$src1,
+        i32:$src2
+    );
+
+.. _TableGenBreakingDAGArgOperators:
+
+**TableGenBreakingDAGArgOperators** (``List of Strings``) :versionbadge:`clang-format 19` :ref:`¶ <TableGenBreakingDAGArgOperators>`
+  Works only when TableGenBreakInsideDAGArgList is true.
+  The string list needs to consist of identifiers in TableGen.
+  If any identifier is specified, this limits the line breaks by
+  TableGenBreakInsideDAGArgList option only on DAGArg values beginning with
+  the specified identifiers.
+
+  For example the configuration,
+
+  .. code-block:: c++
+
+    TableGenBreakInsideDAGArgList: true
+    TableGenBreakingDAGArgOperators: ['ins', 'outs']
+
+  makes the line break only occurs inside DAGArgs beginning with the
+  specified identifiers 'ins' and 'outs'.
+
+
+  .. code-block:: c++
+
+    let DAGArgIns = (ins
+        i32:$src1,
+        i32:$src2
+    );
+    let DAGArgOtherID = (other i32:$other1, i32:$other2);
+    let DAGArgBang = (!cast<SomeType>("Some") i32:$src1, i32:$src2)
+
 .. _TypeNames:
 
 **TypeNames** (``List of Strings``) :versionbadge:`clang-format 17` :ref:`¶ <TypeNames>`
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 613f1fd168465d..9729634183110c 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -4728,6 +4728,43 @@ struct FormatStyle {
   /// \version 8
   std::vector<std::string> StatementMacros;
 
+  /// Works only when TableGenBreakInsideDAGArgList is true.
+  /// The string list needs to consist of identifiers in TableGen.
+  /// If any identifier is specified, this limits the line breaks by
+  /// TableGenBreakInsideDAGArgList option only on DAGArg values beginning with
+  /// the specified identifiers.
+  ///
+  /// For example the configuration,
+  /// \code
+  ///   TableGenBreakInsideDAGArgList: true
+  ///   TableGenBreakingDAGArgOperators: ['ins', 'outs']
+  /// \endcode
+  ///
+  /// makes the line break only occurs inside DAGArgs beginning with the
+  /// specified identifiers 'ins' and 'outs'.
+  ///
+  /// \code
+  ///   let DAGArgIns = (ins
+  ///       i32:$src1,
+  ///       i32:$src2
+  ///   );
+  ///   let DAGArgOtherID = (other i32:$other1, i32:$other2);
+  ///   let DAGArgBang = (!cast<SomeType>("Some") i32:$src1, i32:$src2)
+  /// \endcode
+  /// \version 19
+  std::vector<std::string> TableGenBreakingDAGArgOperators;
+
+  /// Insert the line break for each element of DAGArg list in TableGen.
+  ///
+  /// \code
+  ///   let DAGArgIns = (ins
+  ///       i32:$src1,
+  ///       i32:$src2
+  ///   );
+  /// \endcode
+  /// \version 19
+  bool TableGenBreakInsideDAGArgList;
+
   /// The number of columns used for tab stops.
   /// \version 3.7
   unsigned TabWidth;
@@ -4980,9 +5017,12 @@ struct FormatStyle {
            SpacesInSquareBrackets == R.SpacesInSquareBrackets &&
            Standard == R.Standard &&
            StatementAttributeLikeMacros == R.StatementAttributeLikeMacros &&
-           StatementMacros == R.StatementMacros && TabWidth == R.TabWidth &&
-           TypeNames == R.TypeNames && TypenameMacros == R.TypenameMacros &&
-           UseTab == R.UseTab &&
+           StatementMacros == R.StatementMacros &&
+           TableGenBreakingDAGArgOperators ==
+               R.TableGenBreakingDAGArgOperators &&
+           TableGenBreakInsideDAGArgList == R.TableGenBreakInsideDAGArgList &&
+           TabWidth == R.TabWidth && TypeNames == R.TypeNames &&
+           TypenameMacros == R.TypenameMacros && UseTab == R.UseTab &&
            VerilogBreakBetweenInstancePorts ==
                R.VerilogBreakBetweenInstancePorts &&
            WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros;
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index df44e6994c4784..627ff7980753c2 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1705,7 +1705,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
         (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign ||
          PrecedenceLevel != prec::Comma || Current.NestingLevel == 0) &&
         (!Style.isTableGen() ||
-         (Previous && Previous->is(TT_TableGenDAGArgListComma)))) {
+         (Previous && Previous->isOneOf(TT_TableGenDAGArgListComma,
+                                        TT_TableGenDAGArgListCommaToBreak)))) {
       NewParenState.Indent = std::max(
           std::max(State.Column, NewParenState.Indent), CurrentState.LastSpace);
     }
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index e64ba7eebc1ce8..50ae4563340c3e 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1120,6 +1120,10 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("StatementAttributeLikeMacros",
                    Style.StatementAttributeLikeMacros);
     IO.mapOptional("StatementMacros", Style.StatementMacros);
+    IO.mapOptional("TableGenBreakingDAGArgOperators",
+                   Style.TableGenBreakingDAGArgOperators);
+    IO.mapOptional("TableGenBreakInsideDAGArgList",
+                   Style.TableGenBreakInsideDAGArgList);
     IO.mapOptional("TabWidth", Style.TabWidth);
     IO.mapOptional("TypeNames", Style.TypeNames);
     IO.mapOptional("TypenameMacros", Style.TypenameMacros);
@@ -1580,6 +1584,8 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.StatementAttributeLikeMacros.push_back("Q_EMIT");
   LLVMStyle.StatementMacros.push_back("Q_UNUSED");
   LLVMStyle.StatementMacros.push_back("QT_REQUIRE_VERSION");
+  LLVMStyle.TableGenBreakingDAGArgOperators = {};
+  LLVMStyle.TableGenBreakInsideDAGArgList = false;
   LLVMStyle.TabWidth = 8;
   LLVMStyle.UseTab = FormatStyle::UT_Never;
   LLVMStyle.VerilogBreakBetweenInstancePorts = true;
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 0c1dce7a294082..a291a7c50d3a70 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -155,7 +155,9 @@ namespace format {
   TYPE(TableGenDAGArgCloser)                                                   \
   TYPE(TableGenDAGArgListColon)                                                \
   TYPE(TableGenDAGArgListComma)                                                \
+  TYPE(TableGenDAGArgListCommaToBreak)                                         \
   TYPE(TableGenDAGArgOpener)                                                   \
+  TYPE(TableGenDAGArgOperatorIDToBreak)                                        \
   TYPE(TableGenListCloser)                                                     \
   TYPE(TableGenListOpener)                                                     \
   TYPE(TableGenMultiLineString)                                                \
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index a60d6ae197a24e..605ee95cffb21c 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -990,16 +990,47 @@ class AnnotatingParser {
     return false;
   }
 
+  // Judge if the token is a operator ID to insert line break in DAGArg.
+  // That is, TableGenBreakingDAGArgOperators is empty (by the definition of the
+  // option) or the token is in the list.
+  bool isTableGenDAGArgBreakingOperator(const FormatToken &Tok) {
+    auto &Opes = Style.TableGenBreakingDAGArgOperators;
+    // If the list is empty, all operators are breaking operators.
+    if (Opes.empty())
+      return true;
+    // Otherwise, the operator is limited to normal identifiers.
+    if (Tok.isNot(tok::identifier) ||
+        Tok.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
+      return false;
+    }
+    // The case next is colon, it is not a operator of identifier.
+    if (!Tok.Next || Tok.Next->is(tok::colon))
+      return false;
+    return std::find(Opes.begin(), Opes.end(), Tok.TokenText.str()) !=
+           Opes.end();
+  }
+
   // SimpleValue6 ::=  "(" DagArg [DagArgList] ")"
   // This parses SimpleValue 6's inside part of "(" ")"
   bool parseTableGenDAGArgAndList(FormatToken *Opener) {
+    FormatToken *FirstTok = CurrentToken;
     if (!parseTableGenDAGArg())
       return false;
+    bool BreakInside = false;
+    // Specialized detection for DAGArgOperatorID, a single identifier DAGArg
+    // operator that leads the line break for this DAGArg elements.
+    if (Style.TableGenBreakInsideDAGArgList &&
+        isTableGenDAGArgBreakingOperator(*FirstTok)) {
+      // Special case for identifier DAGArg operator.
+      BreakInside = true;
+      FirstTok->setType(TT_TableGenDAGArgOperatorIDToBreak);
+    }
     // Parse the [DagArgList] part
     bool FirstDAGArgListElm = true;
     while (CurrentToken) {
       if (!FirstDAGArgListElm && CurrentToken->is(tok::comma)) {
-        CurrentToken->setType(TT_TableGenDAGArgListComma);
+        CurrentToken->setType(BreakInside ? TT_TableGenDAGArgListCommaToBreak
+                                          : TT_TableGenDAGArgListComma);
         skipToNextNonComment();
       }
       if (CurrentToken && CurrentToken->is(tok::r_paren)) {
@@ -5085,6 +5116,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     }
     if (Right.is(TT_TableGenCondOperatorColon))
       return false;
+    if (Left.is(TT_TableGenDAGArgOperatorIDToBreak) &&
+        Right.isNot(TT_TableGenDAGArgCloser)) {
+      return true;
+    }
     // Do not insert bang operators and consequent openers.
     if (Right.isOneOf(tok::l_paren, tok::less) &&
         Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
@@ -5461,6 +5496,18 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
     //       case2:0);
     if (Left.is(TT_TableGenCondOperatorComma))
       return true;
+    if (Left.is(TT_TableGenDAGArgOperatorIDToBreak) &&
+        Right.isNot(TT_TableGenDAGArgCloser)) {
+      return true;
+    }
+    if (Left.is(TT_TableGenDAGArgListCommaToBreak))
+      return true;
+    if (Right.is(TT_TableGenDAGArgCloser) && Right.MatchingParen &&
+        Right.MatchingParen->Next->is(TT_TableGenDAGArgOperatorIDToBreak) &&
+        &Left != Right.MatchingParen->Next) {
+      // Check to avoid empty DAGArg such as (ins).
+      return Style.TableGenBreakInsideDAGArgList;
+    }
   }
 
   if (Line.startsWith(tok::kw_asm) && Right.is(TT_InlineASMColon) &&
diff --git a/clang/unittests/Format/FormatTestTableGen.cpp b/clang/unittests/Format/FormatTestTableGen.cpp
index 76b871e2e1a522..dd615ec1046576 100644
--- a/clang/unittests/Format/FormatTestTableGen.cpp
+++ b/clang/unittests/Format/FormatTestTableGen.cpp
@@ -332,6 +332,48 @@ TEST_F(FormatTestTableGen, Assert) {
   verifyFormat("assert !le(DefVar1, 0), \"Assert1\";\n");
 }
 
+TEST_F(FormatTestTableGen, DAGArgBreakInside) {
+  FormatStyle Style = getGoogleStyle(FormatStyle::LK_TableGen);
+  Style.ColumnLimit = 60;
+  // By default, the DAGArg does not have a break inside.
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (ins a:$src1, aa:$src2, aaa:$src3)\n"
+               "}\n",
+               Style);
+  // This option forces to break inside the DAGArg.
+  Style.TableGenBreakInsideDAGArgList = true;
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (ins\n"
+               "      a:$src1,\n"
+               "      aa:$src2,\n"
+               "      aaa:$src3\n"
+               "  )\n"
+               "}\n",
+               Style);
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (other\n"
+               "      a:$src1,\n"
+               "      aa:$src2,\n"
+               "      aaa:$src3\n"
+               "  )\n"
+               "}\n",
+               Style);
+  // Then, limit the DAGArg operator only to "ins".
+  Style.TableGenBreakingDAGArgOperators = {"ins"};
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (ins\n"
+               "      a:$src1,\n"
+               "      aa:$src2,\n"
+               "      aaa:$src3\n"
+               "  )\n"
+               "}\n",
+               Style);
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (other a:$src1, aa:$src2, aaa:$src3)\n"
+               "}\n",
+               Style);
+}
+
 TEST_F(FormatTestTableGen, CondOperatorAlignment) {
   FormatStyle Style = getGoogleStyle(FormatStyle::LK_TableGen);
   Style.ColumnLimit = 60;
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index c736dac8dabf21..be4fad753daf75 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2332,6 +2332,47 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
   EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener);
   EXPECT_TOKEN(Tokens[6], tok::greater, TT_TemplateCloser);
   EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_FunctionLBrace);
+
+  // DAGArg breaking options. They use different token types depending on what
+  // is specified.
+  Style.TableGenBreakInsideDAGArgList = true;
+
+  // Using only TableGenBreakInsideDAGArgList makes all the DAGArg to have line
+  // break.
+  Tokens = AnnotateValue("(ins type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_TableGenDAGArgOperatorIDToBreak); // ins
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
+
+  Tokens = AnnotateValue("(other type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_TableGenDAGArgOperatorIDToBreak); // other
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
+
+  // If TableGenBreakingDAGArgOperators is specified, it is limited to the
+  // specified operators.
+  Style.TableGenBreakingDAGArgOperators = {"ins", "outs"};
+  Tokens = AnnotateValue("(ins type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_TableGenDAGArgOperatorIDToBreak); // ins
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
+
+  Tokens = AnnotateValue("(other type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_Unknown); // other
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListComma);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {



More information about the cfe-commits mailing list