[clang] [clang-format] Add Options to break inside the TableGen DAGArg. (PR #83149)

Hirofumi Nakamura via cfe-commits cfe-commits at lists.llvm.org
Sun Mar 10 20:58:28 PDT 2024


https://github.com/hnakamura5 updated https://github.com/llvm/llvm-project/pull/83149

>From becb28f6daa1fed9cabe40375a7ed863207b6bd2 Mon Sep 17 00:00:00 2001
From: hnakamura5 <k.nakamura.hirofumi at gmail.com>
Date: Wed, 28 Feb 2024 01:10:12 +0900
Subject: [PATCH 1/2] [clang-format] Add Options to break inside the TableGen
 DAGArg.

---
 clang/docs/ClangFormatStyleOptions.rst        | 42 ++++++++++++++++
 clang/include/clang/Format/Format.h           | 46 +++++++++++++++--
 clang/lib/Format/ContinuationIndenter.cpp     |  3 +-
 clang/lib/Format/Format.cpp                   |  6 +++
 clang/lib/Format/FormatToken.h                |  2 +
 clang/lib/Format/TokenAnnotator.cpp           | 49 ++++++++++++++++++-
 clang/unittests/Format/FormatTestTableGen.cpp | 42 ++++++++++++++++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 41 ++++++++++++++++
 8 files changed, 226 insertions(+), 5 deletions(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index df399a229d8d4f..9b055d16b24ac9 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -6158,6 +6158,48 @@ the configuration (without a prefix: ``Auto``).
 **TabWidth** (``Unsigned``) :versionbadge:`clang-format 3.7` :ref:`¶ <TabWidth>`
   The number of columns used for tab stops.
 
+.. _TableGenBreakInsideDAGArgList:
+
+**TableGenBreakInsideDAGArgList** (``Boolean``) :versionbadge:`clang-format 19` :ref:`¶ <TableGenBreakInsideDAGArgList>`
+  Insert the line break for each element of DAGArg list in TableGen.
+
+
+  .. code-block:: c++
+
+    let DAGArgIns = (ins
+        i32:$src1,
+        i32:$src2
+    );
+
+.. _TableGenBreakingDAGArgOperators:
+
+**TableGenBreakingDAGArgOperators** (``List of Strings``) :versionbadge:`clang-format 19` :ref:`¶ <TableGenBreakingDAGArgOperators>`
+  Works only when TableGenBreakInsideDAGArgList is true.
+  The string list needs to consist of identifiers in TableGen.
+  If any identifier is specified, this limits the line breaks by
+  TableGenBreakInsideDAGArgList option only on DAGArg values beginning with
+  the specified identifiers.
+
+  For example the configuration,
+
+  .. code-block:: c++
+
+    TableGenBreakInsideDAGArgList: true
+    TableGenBreakingDAGArgOperators: ['ins', 'outs']
+
+  makes the line break only occurs inside DAGArgs beginning with the
+  specified identifiers 'ins' and 'outs'.
+
+
+  .. code-block:: c++
+
+    let DAGArgIns = (ins
+        i32:$src1,
+        i32:$src2
+    );
+    let DAGArgOtherID = (other i32:$other1, i32:$other2);
+    let DAGArgBang = (!cast<SomeType>("Some") i32:$src1, i32:$src2)
+
 .. _TypeNames:
 
 **TypeNames** (``List of Strings``) :versionbadge:`clang-format 17` :ref:`¶ <TypeNames>`
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 613f1fd168465d..9729634183110c 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -4728,6 +4728,43 @@ struct FormatStyle {
   /// \version 8
   std::vector<std::string> StatementMacros;
 
+  /// Works only when TableGenBreakInsideDAGArgList is true.
+  /// The string list needs to consist of identifiers in TableGen.
+  /// If any identifier is specified, this limits the line breaks by
+  /// TableGenBreakInsideDAGArgList option only on DAGArg values beginning with
+  /// the specified identifiers.
+  ///
+  /// For example the configuration,
+  /// \code
+  ///   TableGenBreakInsideDAGArgList: true
+  ///   TableGenBreakingDAGArgOperators: ['ins', 'outs']
+  /// \endcode
+  ///
+  /// makes the line break only occurs inside DAGArgs beginning with the
+  /// specified identifiers 'ins' and 'outs'.
+  ///
+  /// \code
+  ///   let DAGArgIns = (ins
+  ///       i32:$src1,
+  ///       i32:$src2
+  ///   );
+  ///   let DAGArgOtherID = (other i32:$other1, i32:$other2);
+  ///   let DAGArgBang = (!cast<SomeType>("Some") i32:$src1, i32:$src2)
+  /// \endcode
+  /// \version 19
+  std::vector<std::string> TableGenBreakingDAGArgOperators;
+
+  /// Insert the line break for each element of DAGArg list in TableGen.
+  ///
+  /// \code
+  ///   let DAGArgIns = (ins
+  ///       i32:$src1,
+  ///       i32:$src2
+  ///   );
+  /// \endcode
+  /// \version 19
+  bool TableGenBreakInsideDAGArgList;
+
   /// The number of columns used for tab stops.
   /// \version 3.7
   unsigned TabWidth;
@@ -4980,9 +5017,12 @@ struct FormatStyle {
            SpacesInSquareBrackets == R.SpacesInSquareBrackets &&
            Standard == R.Standard &&
            StatementAttributeLikeMacros == R.StatementAttributeLikeMacros &&
-           StatementMacros == R.StatementMacros && TabWidth == R.TabWidth &&
-           TypeNames == R.TypeNames && TypenameMacros == R.TypenameMacros &&
-           UseTab == R.UseTab &&
+           StatementMacros == R.StatementMacros &&
+           TableGenBreakingDAGArgOperators ==
+               R.TableGenBreakingDAGArgOperators &&
+           TableGenBreakInsideDAGArgList == R.TableGenBreakInsideDAGArgList &&
+           TabWidth == R.TabWidth && TypeNames == R.TypeNames &&
+           TypenameMacros == R.TypenameMacros && UseTab == R.UseTab &&
            VerilogBreakBetweenInstancePorts ==
                R.VerilogBreakBetweenInstancePorts &&
            WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros;
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index df44e6994c4784..627ff7980753c2 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1705,7 +1705,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
         (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign ||
          PrecedenceLevel != prec::Comma || Current.NestingLevel == 0) &&
         (!Style.isTableGen() ||
-         (Previous && Previous->is(TT_TableGenDAGArgListComma)))) {
+         (Previous && Previous->isOneOf(TT_TableGenDAGArgListComma,
+                                        TT_TableGenDAGArgListCommaToBreak)))) {
       NewParenState.Indent = std::max(
           std::max(State.Column, NewParenState.Indent), CurrentState.LastSpace);
     }
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index e64ba7eebc1ce8..50ae4563340c3e 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1120,6 +1120,10 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("StatementAttributeLikeMacros",
                    Style.StatementAttributeLikeMacros);
     IO.mapOptional("StatementMacros", Style.StatementMacros);
+    IO.mapOptional("TableGenBreakingDAGArgOperators",
+                   Style.TableGenBreakingDAGArgOperators);
+    IO.mapOptional("TableGenBreakInsideDAGArgList",
+                   Style.TableGenBreakInsideDAGArgList);
     IO.mapOptional("TabWidth", Style.TabWidth);
     IO.mapOptional("TypeNames", Style.TypeNames);
     IO.mapOptional("TypenameMacros", Style.TypenameMacros);
@@ -1580,6 +1584,8 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.StatementAttributeLikeMacros.push_back("Q_EMIT");
   LLVMStyle.StatementMacros.push_back("Q_UNUSED");
   LLVMStyle.StatementMacros.push_back("QT_REQUIRE_VERSION");
+  LLVMStyle.TableGenBreakingDAGArgOperators = {};
+  LLVMStyle.TableGenBreakInsideDAGArgList = false;
   LLVMStyle.TabWidth = 8;
   LLVMStyle.UseTab = FormatStyle::UT_Never;
   LLVMStyle.VerilogBreakBetweenInstancePorts = true;
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 0c1dce7a294082..a291a7c50d3a70 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -155,7 +155,9 @@ namespace format {
   TYPE(TableGenDAGArgCloser)                                                   \
   TYPE(TableGenDAGArgListColon)                                                \
   TYPE(TableGenDAGArgListComma)                                                \
+  TYPE(TableGenDAGArgListCommaToBreak)                                         \
   TYPE(TableGenDAGArgOpener)                                                   \
+  TYPE(TableGenDAGArgOperatorIDToBreak)                                        \
   TYPE(TableGenListCloser)                                                     \
   TYPE(TableGenListOpener)                                                     \
   TYPE(TableGenMultiLineString)                                                \
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index a60d6ae197a24e..605ee95cffb21c 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -990,16 +990,47 @@ class AnnotatingParser {
     return false;
   }
 
+  // Judge if the token is a operator ID to insert line break in DAGArg.
+  // That is, TableGenBreakingDAGArgOperators is empty (by the definition of the
+  // option) or the token is in the list.
+  bool isTableGenDAGArgBreakingOperator(const FormatToken &Tok) {
+    auto &Opes = Style.TableGenBreakingDAGArgOperators;
+    // If the list is empty, all operators are breaking operators.
+    if (Opes.empty())
+      return true;
+    // Otherwise, the operator is limited to normal identifiers.
+    if (Tok.isNot(tok::identifier) ||
+        Tok.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
+      return false;
+    }
+    // The case next is colon, it is not a operator of identifier.
+    if (!Tok.Next || Tok.Next->is(tok::colon))
+      return false;
+    return std::find(Opes.begin(), Opes.end(), Tok.TokenText.str()) !=
+           Opes.end();
+  }
+
   // SimpleValue6 ::=  "(" DagArg [DagArgList] ")"
   // This parses SimpleValue 6's inside part of "(" ")"
   bool parseTableGenDAGArgAndList(FormatToken *Opener) {
+    FormatToken *FirstTok = CurrentToken;
     if (!parseTableGenDAGArg())
       return false;
+    bool BreakInside = false;
+    // Specialized detection for DAGArgOperatorID, a single identifier DAGArg
+    // operator that leads the line break for this DAGArg elements.
+    if (Style.TableGenBreakInsideDAGArgList &&
+        isTableGenDAGArgBreakingOperator(*FirstTok)) {
+      // Special case for identifier DAGArg operator.
+      BreakInside = true;
+      FirstTok->setType(TT_TableGenDAGArgOperatorIDToBreak);
+    }
     // Parse the [DagArgList] part
     bool FirstDAGArgListElm = true;
     while (CurrentToken) {
       if (!FirstDAGArgListElm && CurrentToken->is(tok::comma)) {
-        CurrentToken->setType(TT_TableGenDAGArgListComma);
+        CurrentToken->setType(BreakInside ? TT_TableGenDAGArgListCommaToBreak
+                                          : TT_TableGenDAGArgListComma);
         skipToNextNonComment();
       }
       if (CurrentToken && CurrentToken->is(tok::r_paren)) {
@@ -5085,6 +5116,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     }
     if (Right.is(TT_TableGenCondOperatorColon))
       return false;
+    if (Left.is(TT_TableGenDAGArgOperatorIDToBreak) &&
+        Right.isNot(TT_TableGenDAGArgCloser)) {
+      return true;
+    }
     // Do not insert bang operators and consequent openers.
     if (Right.isOneOf(tok::l_paren, tok::less) &&
         Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
@@ -5461,6 +5496,18 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
     //       case2:0);
     if (Left.is(TT_TableGenCondOperatorComma))
       return true;
+    if (Left.is(TT_TableGenDAGArgOperatorIDToBreak) &&
+        Right.isNot(TT_TableGenDAGArgCloser)) {
+      return true;
+    }
+    if (Left.is(TT_TableGenDAGArgListCommaToBreak))
+      return true;
+    if (Right.is(TT_TableGenDAGArgCloser) && Right.MatchingParen &&
+        Right.MatchingParen->Next->is(TT_TableGenDAGArgOperatorIDToBreak) &&
+        &Left != Right.MatchingParen->Next) {
+      // Check to avoid empty DAGArg such as (ins).
+      return Style.TableGenBreakInsideDAGArgList;
+    }
   }
 
   if (Line.startsWith(tok::kw_asm) && Right.is(TT_InlineASMColon) &&
diff --git a/clang/unittests/Format/FormatTestTableGen.cpp b/clang/unittests/Format/FormatTestTableGen.cpp
index 76b871e2e1a522..dd615ec1046576 100644
--- a/clang/unittests/Format/FormatTestTableGen.cpp
+++ b/clang/unittests/Format/FormatTestTableGen.cpp
@@ -332,6 +332,48 @@ TEST_F(FormatTestTableGen, Assert) {
   verifyFormat("assert !le(DefVar1, 0), \"Assert1\";\n");
 }
 
+TEST_F(FormatTestTableGen, DAGArgBreakInside) {
+  FormatStyle Style = getGoogleStyle(FormatStyle::LK_TableGen);
+  Style.ColumnLimit = 60;
+  // By default, the DAGArg does not have a break inside.
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (ins a:$src1, aa:$src2, aaa:$src3)\n"
+               "}\n",
+               Style);
+  // This option forces to break inside the DAGArg.
+  Style.TableGenBreakInsideDAGArgList = true;
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (ins\n"
+               "      a:$src1,\n"
+               "      aa:$src2,\n"
+               "      aaa:$src3\n"
+               "  )\n"
+               "}\n",
+               Style);
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (other\n"
+               "      a:$src1,\n"
+               "      aa:$src2,\n"
+               "      aaa:$src3\n"
+               "  )\n"
+               "}\n",
+               Style);
+  // Then, limit the DAGArg operator only to "ins".
+  Style.TableGenBreakingDAGArgOperators = {"ins"};
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (ins\n"
+               "      a:$src1,\n"
+               "      aa:$src2,\n"
+               "      aaa:$src3\n"
+               "  )\n"
+               "}\n",
+               Style);
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (other a:$src1, aa:$src2, aaa:$src3)\n"
+               "}\n",
+               Style);
+}
+
 TEST_F(FormatTestTableGen, CondOperatorAlignment) {
   FormatStyle Style = getGoogleStyle(FormatStyle::LK_TableGen);
   Style.ColumnLimit = 60;
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index c736dac8dabf21..be4fad753daf75 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2332,6 +2332,47 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
   EXPECT_TOKEN(Tokens[4], tok::less, TT_TemplateOpener);
   EXPECT_TOKEN(Tokens[6], tok::greater, TT_TemplateCloser);
   EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_FunctionLBrace);
+
+  // DAGArg breaking options. They use different token types depending on what
+  // is specified.
+  Style.TableGenBreakInsideDAGArgList = true;
+
+  // Using only TableGenBreakInsideDAGArgList makes all the DAGArg to have line
+  // break.
+  Tokens = AnnotateValue("(ins type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_TableGenDAGArgOperatorIDToBreak); // ins
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
+
+  Tokens = AnnotateValue("(other type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_TableGenDAGArgOperatorIDToBreak); // other
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
+
+  // If TableGenBreakingDAGArgOperators is specified, it is limited to the
+  // specified operators.
+  Style.TableGenBreakingDAGArgOperators = {"ins", "outs"};
+  Tokens = AnnotateValue("(ins type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_TableGenDAGArgOperatorIDToBreak); // ins
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
+
+  Tokens = AnnotateValue("(other type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_Unknown); // other
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListComma);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {

>From 9475ae3e891ac13fef32f1a72fb4ac213239b125 Mon Sep 17 00:00:00 2001
From: hnakamura5 <k.nakamura.hirofumi at gmail.com>
Date: Mon, 11 Mar 2024 12:52:23 +0900
Subject: [PATCH 2/2] Brushed up the option

---
 clang/docs/ClangFormatStyleOptions.rst        | 42 ++++++++++++----
 clang/include/clang/Format/Format.h           | 41 +++++++++++-----
 clang/lib/Format/ContinuationIndenter.cpp     | 18 ++++++-
 clang/lib/Format/Format.cpp                   | 14 ++++--
 clang/lib/Format/FormatToken.h                |  4 +-
 clang/lib/Format/TokenAnnotator.cpp           | 37 +++++++++-----
 clang/unittests/Format/FormatTestTableGen.cpp | 48 ++++++++++++++++---
 clang/unittests/Format/TokenAnnotatorTest.cpp | 48 +++++++++++++++----
 8 files changed, 199 insertions(+), 53 deletions(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 9b055d16b24ac9..36e459f5baf61c 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -6158,33 +6158,55 @@ the configuration (without a prefix: ``Auto``).
 **TabWidth** (``Unsigned``) :versionbadge:`clang-format 3.7` :ref:`¶ <TabWidth>`
   The number of columns used for tab stops.
 
-.. _TableGenBreakInsideDAGArgList:
+.. _TableGenBreakInsideDAGArg:
 
-**TableGenBreakInsideDAGArgList** (``Boolean``) :versionbadge:`clang-format 19` :ref:`¶ <TableGenBreakInsideDAGArgList>`
+**TableGenBreakInsideDAGArg** (``DAGArgStyle``) :versionbadge:`clang-format 19` :ref:`¶ <TableGenBreakInsideDAGArg>`
   Insert the line break for each element of DAGArg list in TableGen.
 
+  Possible values:
+
+  * ``DAS_DontBreak`` (in configuration: ``DontBreak``)
+    Never break inside DAGArg.
+
+    .. code-block:: c++
+
+      let DAGArgIns = (ins i32:$src1, i32:$src2);
+
+  * ``DAS_BreakElements`` (in configuration: ``BreakElements``)
+    Break inside DAGArg after each list element but for the last.
+    This aligns to the first element.
+
+    .. code-block:: c++
+
+      let DAGArgIns = (ins i32:$src1,
+                           i32:$src2);
+
+  * ``DAS_BreakAll`` (in configuration: ``BreakAll``)
+    Break inside DAGArg after the operator and the all elements.
+
+    .. code-block:: c++
+
+      let DAGArgIns = (ins
+          i32:$src1,
+          i32:$src2
+      );
 
-  .. code-block:: c++
 
-    let DAGArgIns = (ins
-        i32:$src1,
-        i32:$src2
-    );
 
 .. _TableGenBreakingDAGArgOperators:
 
 **TableGenBreakingDAGArgOperators** (``List of Strings``) :versionbadge:`clang-format 19` :ref:`¶ <TableGenBreakingDAGArgOperators>`
-  Works only when TableGenBreakInsideDAGArgList is true.
+  Works only when TableGenBreakInsideDAGArg is true.
   The string list needs to consist of identifiers in TableGen.
   If any identifier is specified, this limits the line breaks by
-  TableGenBreakInsideDAGArgList option only on DAGArg values beginning with
+  TableGenBreakInsideDAGArg option only on DAGArg values beginning with
   the specified identifiers.
 
   For example the configuration,
 
   .. code-block:: c++
 
-    TableGenBreakInsideDAGArgList: true
+    TableGenBreakInsideDAGArg: true
     TableGenBreakingDAGArgOperators: ['ins', 'outs']
 
   makes the line break only occurs inside DAGArgs beginning with the
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 9729634183110c..7ab6c1caf6b0d7 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -4728,15 +4728,15 @@ struct FormatStyle {
   /// \version 8
   std::vector<std::string> StatementMacros;
 
-  /// Works only when TableGenBreakInsideDAGArgList is true.
+  /// Works only when TableGenBreakInsideDAGArg is true.
   /// The string list needs to consist of identifiers in TableGen.
   /// If any identifier is specified, this limits the line breaks by
-  /// TableGenBreakInsideDAGArgList option only on DAGArg values beginning with
+  /// TableGenBreakInsideDAGArg option only on DAGArg values beginning with
   /// the specified identifiers.
   ///
   /// For example the configuration,
   /// \code
-  ///   TableGenBreakInsideDAGArgList: true
+  ///   TableGenBreakInsideDAGArg: true
   ///   TableGenBreakingDAGArgOperators: ['ins', 'outs']
   /// \endcode
   ///
@@ -4754,16 +4754,33 @@ struct FormatStyle {
   /// \version 19
   std::vector<std::string> TableGenBreakingDAGArgOperators;
 
+  /// Different ways to control the format inside TableGen DAGArg.
+  enum DAGArgStyle : int8_t {
+    /// Never break inside DAGArg.
+    /// \code
+    ///   let DAGArgIns = (ins i32:$src1, i32:$src2);
+    /// \endcode
+    DAS_DontBreak,
+    /// Break inside DAGArg after each list element but for the last.
+    /// This aligns to the first element.
+    /// \code
+    ///   let DAGArgIns = (ins i32:$src1,
+    ///                        i32:$src2);
+    /// \endcode
+    DAS_BreakElements,
+    /// Break inside DAGArg after the operator and the all elements.
+    /// \code
+    ///   let DAGArgIns = (ins
+    ///       i32:$src1,
+    ///       i32:$src2
+    ///   );
+    /// \endcode
+    DAS_BreakAll,
+  };
+
   /// Insert the line break for each element of DAGArg list in TableGen.
-  ///
-  /// \code
-  ///   let DAGArgIns = (ins
-  ///       i32:$src1,
-  ///       i32:$src2
-  ///   );
-  /// \endcode
   /// \version 19
-  bool TableGenBreakInsideDAGArgList;
+  DAGArgStyle TableGenBreakInsideDAGArg;
 
   /// The number of columns used for tab stops.
   /// \version 3.7
@@ -5020,7 +5037,7 @@ struct FormatStyle {
            StatementMacros == R.StatementMacros &&
            TableGenBreakingDAGArgOperators ==
                R.TableGenBreakingDAGArgOperators &&
-           TableGenBreakInsideDAGArgList == R.TableGenBreakInsideDAGArgList &&
+           TableGenBreakInsideDAGArg == R.TableGenBreakInsideDAGArg &&
            TabWidth == R.TabWidth && TypeNames == R.TypeNames &&
            TypenameMacros == R.TypenameMacros && UseTab == R.UseTab &&
            VerilogBreakBetweenInstancePorts ==
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 627ff7980753c2..b59db356dde9af 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -822,6 +822,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
       !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
       Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) &&
       Previous.isNot(TT_TableGenDAGArgOpener) &&
+      Previous.isNot(TT_TableGenDAGArgOpenerToBreak) &&
       !(Current.MacroParent && Previous.MacroParent) &&
       (Current.isNot(TT_LineComment) ||
        Previous.isOneOf(BK_BracedInit, TT_VerilogMultiLineListLParen))) {
@@ -1444,7 +1445,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
       Style.BreakInheritanceList == FormatStyle::BILS_AfterColon) {
     return CurrentState.Indent;
   }
-  if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() &&
+  if (Previous.is(tok::r_paren) &&
+      Previous.isNot(TT_TableGenDAGArgOperatorToBreak) &&
+      !Current.isBinaryOperator() &&
       !Current.isOneOf(tok::colon, tok::comment)) {
     return ContinuationIndent;
   }
@@ -1843,6 +1846,19 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
         Style.ContinuationIndentWidth +
         std::max(CurrentState.LastSpace, CurrentState.StartOfFunctionCall);
 
+    if (Style.isTableGen()) {
+      if (Current.is(TT_TableGenDAGArgOpenerToBreak) &&
+          Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakElements) {
+        // For the case the next token is a TableGen DAGArg operator identifier
+        // that is not marked to have a line break after it.
+        // In this case the option DAS_BreakElements requires to align the
+        // DAGArg elements to the operator.
+        const FormatToken *Next = Current.Next;
+        if (Next && Next->is(TT_TableGenDAGArgOperatorID))
+          NewIndent = State.Column + Next->TokenText.size() + 2;
+      }
+    }
+
     // Ensure that different different brackets force relative alignment, e.g.:
     // void SomeFunction(vector<  // break
     //                       int> v);
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 50ae4563340c3e..fdf276122fe736 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -307,6 +307,14 @@ struct ScalarEnumerationTraits<FormatStyle::BreakTemplateDeclarationsStyle> {
   }
 };
 
+template <> struct ScalarEnumerationTraits<FormatStyle::DAGArgStyle> {
+  static void enumeration(IO &IO, FormatStyle::DAGArgStyle &Value) {
+    IO.enumCase(Value, "DontBreak", FormatStyle::DAS_DontBreak);
+    IO.enumCase(Value, "BreakElements", FormatStyle::DAS_BreakElements);
+    IO.enumCase(Value, "BreakAll", FormatStyle::DAS_BreakAll);
+  }
+};
+
 template <>
 struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
   static void
@@ -1122,8 +1130,8 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("StatementMacros", Style.StatementMacros);
     IO.mapOptional("TableGenBreakingDAGArgOperators",
                    Style.TableGenBreakingDAGArgOperators);
-    IO.mapOptional("TableGenBreakInsideDAGArgList",
-                   Style.TableGenBreakInsideDAGArgList);
+    IO.mapOptional("TableGenBreakInsideDAGArg",
+                   Style.TableGenBreakInsideDAGArg);
     IO.mapOptional("TabWidth", Style.TabWidth);
     IO.mapOptional("TypeNames", Style.TypeNames);
     IO.mapOptional("TypenameMacros", Style.TypenameMacros);
@@ -1585,7 +1593,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.StatementMacros.push_back("Q_UNUSED");
   LLVMStyle.StatementMacros.push_back("QT_REQUIRE_VERSION");
   LLVMStyle.TableGenBreakingDAGArgOperators = {};
-  LLVMStyle.TableGenBreakInsideDAGArgList = false;
+  LLVMStyle.TableGenBreakInsideDAGArg = FormatStyle::DAS_DontBreak;
   LLVMStyle.TabWidth = 8;
   LLVMStyle.UseTab = FormatStyle::UT_Never;
   LLVMStyle.VerilogBreakBetweenInstancePorts = true;
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index a291a7c50d3a70..ecf850a3c75e81 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -157,7 +157,9 @@ namespace format {
   TYPE(TableGenDAGArgListComma)                                                \
   TYPE(TableGenDAGArgListCommaToBreak)                                         \
   TYPE(TableGenDAGArgOpener)                                                   \
-  TYPE(TableGenDAGArgOperatorIDToBreak)                                        \
+  TYPE(TableGenDAGArgOpenerToBreak)                                            \
+  TYPE(TableGenDAGArgOperatorID)                                               \
+  TYPE(TableGenDAGArgOperatorToBreak)                                          \
   TYPE(TableGenListCloser)                                                     \
   TYPE(TableGenListOpener)                                                     \
   TYPE(TableGenMultiLineString)                                                \
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 605ee95cffb21c..9f799d8320a712 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1017,13 +1017,25 @@ class AnnotatingParser {
     if (!parseTableGenDAGArg())
       return false;
     bool BreakInside = false;
-    // Specialized detection for DAGArgOperatorID, a single identifier DAGArg
-    // operator that leads the line break for this DAGArg elements.
-    if (Style.TableGenBreakInsideDAGArgList &&
-        isTableGenDAGArgBreakingOperator(*FirstTok)) {
-      // Special case for identifier DAGArg operator.
-      BreakInside = true;
-      FirstTok->setType(TT_TableGenDAGArgOperatorIDToBreak);
+    if (Style.TableGenBreakInsideDAGArg != FormatStyle::DAS_DontBreak) {
+      // Specialized detection for DAGArgOperator, that determines the way of
+      // line break for this DAGArg elements.
+      if (isTableGenDAGArgBreakingOperator(*FirstTok)) {
+        // Special case for identifier DAGArg operator.
+        BreakInside = true;
+        Opener->setType(TT_TableGenDAGArgOpenerToBreak);
+        if (FirstTok->isOneOf(TT_TableGenBangOperator,
+                              TT_TableGenCondOperator)) {
+          // Special case for bang/cond operators. Set the whole operator as
+          // the DAGArg operator. Always break after it.
+          CurrentToken->Previous->setType(TT_TableGenDAGArgOperatorToBreak);
+        } else if (FirstTok->is(tok::identifier)) {
+          if (Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakAll)
+            FirstTok->setType(TT_TableGenDAGArgOperatorToBreak);
+          else
+            FirstTok->setType(TT_TableGenDAGArgOperatorID);
+        }
+      }
     }
     // Parse the [DagArgList] part
     bool FirstDAGArgListElm = true;
@@ -5116,7 +5128,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     }
     if (Right.is(TT_TableGenCondOperatorColon))
       return false;
-    if (Left.is(TT_TableGenDAGArgOperatorIDToBreak) &&
+    if (Left.isOneOf(TT_TableGenDAGArgOperatorID,
+                     TT_TableGenDAGArgOperatorToBreak) &&
         Right.isNot(TT_TableGenDAGArgCloser)) {
       return true;
     }
@@ -5496,17 +5509,17 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
     //       case2:0);
     if (Left.is(TT_TableGenCondOperatorComma))
       return true;
-    if (Left.is(TT_TableGenDAGArgOperatorIDToBreak) &&
+    if (Left.is(TT_TableGenDAGArgOperatorToBreak) &&
         Right.isNot(TT_TableGenDAGArgCloser)) {
       return true;
     }
     if (Left.is(TT_TableGenDAGArgListCommaToBreak))
       return true;
     if (Right.is(TT_TableGenDAGArgCloser) && Right.MatchingParen &&
-        Right.MatchingParen->Next->is(TT_TableGenDAGArgOperatorIDToBreak) &&
+        Right.MatchingParen->is(TT_TableGenDAGArgOpenerToBreak) &&
         &Left != Right.MatchingParen->Next) {
       // Check to avoid empty DAGArg such as (ins).
-      return Style.TableGenBreakInsideDAGArgList;
+      return Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakAll;
     }
   }
 
@@ -5922,6 +5935,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
     // Avoid to break around paste operator.
     if (Left.is(tok::hash) || Right.is(tok::hash))
       return false;
+    if (Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator))
+      return false;
   }
 
   if (Left.is(tok::at))
diff --git a/clang/unittests/Format/FormatTestTableGen.cpp b/clang/unittests/Format/FormatTestTableGen.cpp
index dd615ec1046576..4e0699dff86486 100644
--- a/clang/unittests/Format/FormatTestTableGen.cpp
+++ b/clang/unittests/Format/FormatTestTableGen.cpp
@@ -332,7 +332,7 @@ TEST_F(FormatTestTableGen, Assert) {
   verifyFormat("assert !le(DefVar1, 0), \"Assert1\";\n");
 }
 
-TEST_F(FormatTestTableGen, DAGArgBreakInside) {
+TEST_F(FormatTestTableGen, DAGArgBreakElements) {
   FormatStyle Style = getGoogleStyle(FormatStyle::LK_TableGen);
   Style.ColumnLimit = 60;
   // By default, the DAGArg does not have a break inside.
@@ -341,13 +341,49 @@ TEST_F(FormatTestTableGen, DAGArgBreakInside) {
                "}\n",
                Style);
   // This option forces to break inside the DAGArg.
-  Style.TableGenBreakInsideDAGArgList = true;
+  Style.TableGenBreakInsideDAGArg = FormatStyle::DAS_BreakElements;
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (ins a:$src1,\n"
+               "                    aa:$src2,\n"
+               "                    aaa:$src3);\n"
+               "}\n",
+               Style);
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (other a:$src1,\n"
+               "                      aa:$src2,\n"
+               "                      aaa:$src3);\n"
+               "}\n",
+               Style);
+  // Then, limit the DAGArg operator only to "ins".
+  Style.TableGenBreakingDAGArgOperators = {"ins"};
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (ins a:$src1,\n"
+               "                    aa:$src2,\n"
+               "                    aaa:$src3);\n"
+               "}\n",
+               Style);
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (other a:$src1, aa:$src2, aaa:$src3)\n"
+               "}\n",
+               Style);
+}
+
+TEST_F(FormatTestTableGen, DAGArgBreakAll) {
+  FormatStyle Style = getGoogleStyle(FormatStyle::LK_TableGen);
+  Style.ColumnLimit = 60;
+  // By default, the DAGArg does not have a break inside.
+  verifyFormat("def Def : Parent {\n"
+               "  let dagarg = (ins a:$src1, aa:$src2, aaa:$src3)\n"
+               "}\n",
+               Style);
+  // This option forces to break inside the DAGArg.
+  Style.TableGenBreakInsideDAGArg = FormatStyle::DAS_BreakAll;
   verifyFormat("def Def : Parent {\n"
                "  let dagarg = (ins\n"
                "      a:$src1,\n"
                "      aa:$src2,\n"
                "      aaa:$src3\n"
-               "  )\n"
+               "  );\n"
                "}\n",
                Style);
   verifyFormat("def Def : Parent {\n"
@@ -355,7 +391,7 @@ TEST_F(FormatTestTableGen, DAGArgBreakInside) {
                "      a:$src1,\n"
                "      aa:$src2,\n"
                "      aaa:$src3\n"
-               "  )\n"
+               "  );\n"
                "}\n",
                Style);
   // Then, limit the DAGArg operator only to "ins".
@@ -365,11 +401,11 @@ TEST_F(FormatTestTableGen, DAGArgBreakInside) {
                "      a:$src1,\n"
                "      aa:$src2,\n"
                "      aaa:$src3\n"
-               "  )\n"
+               "  );\n"
                "}\n",
                Style);
   verifyFormat("def Def : Parent {\n"
-               "  let dagarg = (other a:$src1, aa:$src2, aaa:$src3)\n"
+               "  let dagarg = (other a:$src1, aa:$src2, aaa:$src3);\n"
                "}\n",
                Style);
 }
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index be4fad753daf75..27c6980c94f7fb 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2335,23 +2335,53 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
 
   // DAGArg breaking options. They use different token types depending on what
   // is specified.
-  Style.TableGenBreakInsideDAGArgList = true;
+  Style.TableGenBreakInsideDAGArg = FormatStyle::DAS_BreakElements;
 
-  // Using only TableGenBreakInsideDAGArgList makes all the DAGArg to have line
-  // break.
+  // When TableGenBreakInsideDAGArg is DAS_BreakElements and
+  // TableGenBreakingDAGArgOperators is not specified, it makes all the DAGArg
+  // elements to have line break.
   Tokens = AnnotateValue("(ins type1:$src1, type2:$src2)");
   ASSERT_EQ(Tokens.size(), 10u) << Tokens;
-  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpenerToBreak);
   EXPECT_TOKEN(Tokens[1], tok::identifier,
-               TT_TableGenDAGArgOperatorIDToBreak); // ins
+               TT_TableGenDAGArgOperatorID); // ins
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
   EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
 
   Tokens = AnnotateValue("(other type1:$src1, type2:$src2)");
   ASSERT_EQ(Tokens.size(), 10u) << Tokens;
-  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpenerToBreak);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_TableGenDAGArgOperatorID); // other
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
+
+  // For non-identifier operators, breaks after the operator.
+  Tokens = AnnotateValue("(!cast<Type>(\"Name\") type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 16u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpenerToBreak);
+  EXPECT_TOKEN(Tokens[7], tok::r_paren, TT_TableGenDAGArgOperatorToBreak);
+  EXPECT_TOKEN(Tokens[11], tok::comma, TT_TableGenDAGArgListCommaToBreak);
+  EXPECT_TOKEN(Tokens[15], tok::r_paren, TT_TableGenDAGArgCloser);
+
+  Style.TableGenBreakInsideDAGArg = FormatStyle::DAS_BreakAll;
+
+  // When TableGenBreakInsideDAGArg is DAS_BreakAll and
+  // TableGenBreakingDAGArgOperators is not specified, it makes all the DAGArg
+  // to have line break inside it.
+  Tokens = AnnotateValue("(ins type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpenerToBreak);
   EXPECT_TOKEN(Tokens[1], tok::identifier,
-               TT_TableGenDAGArgOperatorIDToBreak); // other
+               TT_TableGenDAGArgOperatorToBreak); // ins
+  EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
+  EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
+
+  Tokens = AnnotateValue("(other type1:$src1, type2:$src2)");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpenerToBreak);
+  EXPECT_TOKEN(Tokens[1], tok::identifier,
+               TT_TableGenDAGArgOperatorToBreak); // other
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
   EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
 
@@ -2360,9 +2390,9 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) {
   Style.TableGenBreakingDAGArgOperators = {"ins", "outs"};
   Tokens = AnnotateValue("(ins type1:$src1, type2:$src2)");
   ASSERT_EQ(Tokens.size(), 10u) << Tokens;
-  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpener);
+  EXPECT_TOKEN(Tokens[0], tok::l_paren, TT_TableGenDAGArgOpenerToBreak);
   EXPECT_TOKEN(Tokens[1], tok::identifier,
-               TT_TableGenDAGArgOperatorIDToBreak); // ins
+               TT_TableGenDAGArgOperatorToBreak); // ins
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_TableGenDAGArgListCommaToBreak);
   EXPECT_TOKEN(Tokens[9], tok::r_paren, TT_TableGenDAGArgCloser);
 



More information about the cfe-commits mailing list