[clang] [ClangFormat] Fix indent in child lines within a macro argument. (PR #82523)

via cfe-commits cfe-commits at lists.llvm.org
Wed Feb 21 11:47:36 PST 2024


https://github.com/r4nt created https://github.com/llvm/llvm-project/pull/82523

When reconstructing lines from a macro expansion, make sure that lines at different levels in the expanded code get indented correctly as part of the macro argument.

>From d9b8493f993a4020f3eaf2911b9704e4b25ac6b7 Mon Sep 17 00:00:00 2001
From: Manuel Klimek <klimek at google.com>
Date: Wed, 21 Feb 2024 14:16:54 +0000
Subject: [PATCH] [ClangFormat] Fix indent in child lines within a macro
 argument.

When reconstructing lines from a macro expansion, make sure that
lines at different levels in the expanded code get indented
correctly as part of the macro argument.
---
 clang/lib/Format/MacroCallReconstructor.cpp   |  69 ++++++----
 clang/lib/Format/Macros.h                     |  10 +-
 clang/lib/Format/UnwrappedLineParser.cpp      |   6 +
 clang/lib/Format/UnwrappedLineParser.h        |   2 +
 .../Format/FormatTestMacroExpansion.cpp       |  23 +++-
 .../Format/MacroCallReconstructorTest.cpp     | 129 ++++++++++++------
 6 files changed, 166 insertions(+), 73 deletions(-)

diff --git a/clang/lib/Format/MacroCallReconstructor.cpp b/clang/lib/Format/MacroCallReconstructor.cpp
index cbdd1683c54d1a..5277b406c81b11 100644
--- a/clang/lib/Format/MacroCallReconstructor.cpp
+++ b/clang/lib/Format/MacroCallReconstructor.cpp
@@ -19,6 +19,7 @@
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/Debug.h"
+#include <algorithm>
 #include <cassert>
 
 #define DEBUG_TYPE "format-reconstruct"
@@ -33,7 +34,7 @@ void forEachToken(const UnwrappedLine &Line, const T &Call,
                   FormatToken *Parent = nullptr) {
   bool First = true;
   for (const auto &N : Line.Tokens) {
-    Call(N.Tok, Parent, First);
+    Call(N.Tok, Parent, First, Line.Level);
     First = false;
     for (const auto &Child : N.Children)
       forEachToken(Child, Call, N.Tok);
@@ -44,7 +45,7 @@ MacroCallReconstructor::MacroCallReconstructor(
     unsigned Level,
     const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
         &ActiveExpansions)
-    : Level(Level), IdToReconstructed(ActiveExpansions) {
+    : Result(Level), IdToReconstructed(ActiveExpansions) {
   Result.Tokens.push_back(std::make_unique<LineNode>());
   ActiveReconstructedLines.push_back(&Result);
 }
@@ -52,9 +53,8 @@ MacroCallReconstructor::MacroCallReconstructor(
 void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
   assert(State != Finalized);
   LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
-  forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
-    add(Token, Parent, First);
-  });
+  forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First,
+                         unsigned Level) { add(Token, Parent, First, Level); });
   assert(InProgress || finished());
 }
 
@@ -62,8 +62,8 @@ UnwrappedLine MacroCallReconstructor::takeResult() && {
   finalize();
   assert(Result.Tokens.size() == 1 &&
          Result.Tokens.front()->Children.size() == 1);
-  UnwrappedLine Final =
-      createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
+  UnwrappedLine Final = createUnwrappedLine(
+      *Result.Tokens.front()->Children.front(), Result.Level);
   assert(!Final.Tokens.empty());
   return Final;
 }
@@ -72,7 +72,8 @@ UnwrappedLine MacroCallReconstructor::takeResult() && {
 // ExpandedParent in the incoming unwrapped line. \p First specifies whether it
 // is the first token in a given unwrapped line.
 void MacroCallReconstructor::add(FormatToken *Token,
-                                 FormatToken *ExpandedParent, bool First) {
+                                 FormatToken *ExpandedParent, bool First,
+                                 unsigned Level) {
   LLVM_DEBUG(
       llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
                    << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
@@ -102,7 +103,7 @@ void MacroCallReconstructor::add(FormatToken *Token,
       First = true;
   }
 
-  prepareParent(ExpandedParent, First);
+  prepareParent(ExpandedParent, First, Level);
 
   if (Token->MacroCtx) {
     // If this token was generated by a macro call, add the reconstructed
@@ -129,7 +130,7 @@ void MacroCallReconstructor::add(FormatToken *Token,
 // is the parent of ActiveReconstructedLines.back() in the reconstructed
 // unwrapped line.
 void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
-                                           bool NewLine) {
+                                           bool NewLine, unsigned Level) {
   LLVM_DEBUG({
     llvm::dbgs() << "ParentMap:\n";
     debugParentMap();
@@ -172,7 +173,7 @@ void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
     }
     assert(!ActiveReconstructedLines.empty());
     ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
-        std::make_unique<ReconstructedLine>());
+        std::make_unique<ReconstructedLine>(Level));
     ActiveReconstructedLines.push_back(
         &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
   } else if (parentLine().Tokens.back()->Tok != Parent) {
@@ -424,7 +425,8 @@ bool MacroCallReconstructor::processNextReconstructed() {
       SpelledParentToReconstructedParent[MacroCallStructure.back()
                                              .ParentLastToken] = Token;
       appendToken(Token);
-      prepareParent(Token, /*NewLine=*/true);
+      prepareParent(Token, /*NewLine=*/true,
+                    MacroCallStructure.back().Line->Level);
       Token->MacroParent = true;
       return false;
     }
@@ -435,7 +437,8 @@ bool MacroCallReconstructor::processNextReconstructed() {
             [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
         Token->MacroParent = true;
         appendToken(Token, MacroCallStructure.back().Line);
-        prepareParent(Token, /*NewLine=*/true);
+        prepareParent(Token, /*NewLine=*/true,
+                      MacroCallStructure.back().Line->Level);
         return true;
       }
       if (Token->is(tok::r_paren)) {
@@ -509,16 +512,36 @@ MacroCallReconstructor::createUnwrappedLine(const ReconstructedLine &Line,
   for (const auto &N : Line.Tokens) {
     Result.Tokens.push_back(N->Tok);
     UnwrappedLineNode &Current = Result.Tokens.back();
-    for (const auto &Child : N->Children) {
-      if (Child->Tokens.empty())
-        continue;
-      Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
-    }
-    if (Current.Children.size() == 1 &&
-        Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
-      Result.Tokens.splice(Result.Tokens.end(),
-                           Current.Children.front().Tokens);
-      Current.Children.clear();
+    auto NumChildren =
+        std::count_if(N->Children.begin(), N->Children.end(),
+                      [](const auto &Child) { return !Child->Tokens.empty(); });
+    if (NumChildren == 1 && Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
+      // If we only have one child, and the child is due to a macro expansion
+      // (either attached to a left parenthesis or comma), merge the child into
+      // the current line to prevent forced breaks for macro arguments.
+      auto *Child = std::find_if(
+          N->Children.begin(), N->Children.end(),
+          [](const auto &Child) { return !Child->Tokens.empty(); });
+      auto Line = createUnwrappedLine(**Child, Level);
+      Result.Tokens.splice(Result.Tokens.end(), Line.Tokens);
+    } else if (NumChildren > 0) {
+      // When there are multiple children with different indent, make sure that
+      // we indent them:
+      // 1. One level below the current line's level.
+      // 2. At the correct level relative to each other.
+      unsigned MinChildLevel =
+          std::min_element(N->Children.begin(), N->Children.end(),
+                           [](const auto &E1, const auto &E2) {
+                             return E1->Level < E2->Level;
+                           })
+              ->get()
+              ->Level;
+      for (const auto &Child : N->Children) {
+        if (Child->Tokens.empty())
+          continue;
+        Current.Children.push_back(createUnwrappedLine(
+            *Child, Level + 1 + (Child->Level - MinChildLevel)));
+      }
     }
   }
   return Result;
diff --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h
index 1964624e828ce3..d2f7fe502364cd 100644
--- a/clang/lib/Format/Macros.h
+++ b/clang/lib/Format/Macros.h
@@ -231,8 +231,9 @@ class MacroCallReconstructor {
   UnwrappedLine takeResult() &&;
 
 private:
-  void add(FormatToken *Token, FormatToken *ExpandedParent, bool First);
-  void prepareParent(FormatToken *ExpandedParent, bool First);
+  void add(FormatToken *Token, FormatToken *ExpandedParent, bool First,
+           unsigned Level);
+  void prepareParent(FormatToken *ExpandedParent, bool First, unsigned Level);
   FormatToken *getParentInResult(FormatToken *Parent);
   void reconstruct(FormatToken *Token);
   void startReconstruction(FormatToken *Token);
@@ -272,6 +273,8 @@ class MacroCallReconstructor {
   // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
   // instead of rolling our own type.
   struct ReconstructedLine {
+    explicit ReconstructedLine(unsigned Level) : Level(Level) {}
+    unsigned Level;
     llvm::SmallVector<std::unique_ptr<LineNode>> Tokens;
   };
 
@@ -373,9 +376,6 @@ class MacroCallReconstructor {
   // \- )
   llvm::SmallVector<MacroCallState> MacroCallStructure;
 
-  // Level the generated UnwrappedLine will be at.
-  const unsigned Level;
-
   // Maps from identifier of the macro call to an unwrapped line containing
   // all tokens of the macro call.
   const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 8f6453a25d9d41..3a424bdcde793a 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -90,6 +90,12 @@ class ScopedDeclarationState {
 
 } // end anonymous namespace
 
+std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) {
+  llvm::raw_os_ostream OS(Stream);
+  printLine(OS, Line);
+  return Stream;
+}
+
 class ScopedLineState {
 public:
   ScopedLineState(UnwrappedLineParser &Parser,
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index 739298690bbd76..1403533a2d0ef6 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -420,6 +420,8 @@ struct UnwrappedLineNode {
   SmallVector<UnwrappedLine, 0> Children;
 };
 
+std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line);
+
 } // end namespace format
 } // end namespace clang
 
diff --git a/clang/unittests/Format/FormatTestMacroExpansion.cpp b/clang/unittests/Format/FormatTestMacroExpansion.cpp
index 653ec2a94c64da..ad5d7941bd4485 100644
--- a/clang/unittests/Format/FormatTestMacroExpansion.cpp
+++ b/clang/unittests/Format/FormatTestMacroExpansion.cpp
@@ -48,7 +48,7 @@ TEST_F(FormatTestMacroExpansion, UnexpandConfiguredMacros) {
 )",
                Style);
   verifyIncompleteFormat("ID3({, ID(a *b),\n"
-                         "  ;\n"
+                         "    ;\n"
                          "  });",
                          Style);
 
@@ -131,9 +131,9 @@ ID(CALL(CALL(a * b)));
   EXPECT_EQ(R"(
 ID3(
     {
-    CLASS
-    a *b;
-    };
+      CLASS
+        a *b;
+      };
     },
     ID(x *y);
     ,
@@ -287,6 +287,21 @@ TEST_F(FormatTestMacroExpansion,
                Style);
 }
 
+TEST_F(FormatTestMacroExpansion, IndentChildrenWithinMacroCall) {
+  FormatStyle Style = getGoogleStyleWithColumns(22);
+  Style.Macros.push_back("MACRO(a, b)=a=(b)");
+  verifyFormat("void f() {\n"
+               "  MACRO(a b, call([] {\n"
+               "          if (expr) {\n"
+               "            indent();\n"
+               "          }\n"
+               "        }));\n"
+               "}"
+
+               ,
+               Style);
+}
+
 } // namespace
 } // namespace test
 } // namespace format
diff --git a/clang/unittests/Format/MacroCallReconstructorTest.cpp b/clang/unittests/Format/MacroCallReconstructorTest.cpp
index 6e6900577d165c..9df21eae70cb75 100644
--- a/clang/unittests/Format/MacroCallReconstructorTest.cpp
+++ b/clang/unittests/Format/MacroCallReconstructorTest.cpp
@@ -151,17 +151,21 @@ class MacroCallReconstructorTest : public ::testing::Test {
                                            Lex.Allocator, Lex.IdentTable);
   }
 
-  UnwrappedLine line(llvm::ArrayRef<FormatToken *> Tokens) {
+  UnwrappedLine line(llvm::ArrayRef<FormatToken *> Tokens, unsigned Level = 0) {
     UnwrappedLine Result;
+    Result.Level = Level;
     for (FormatToken *Tok : Tokens)
       Result.Tokens.push_back(UnwrappedLineNode(Tok));
     return Result;
   }
 
-  UnwrappedLine line(llvm::StringRef Text) { return line({lex(Text)}); }
+  UnwrappedLine line(llvm::StringRef Text, unsigned Level = 0) {
+    return line({lex(Text)}, Level);
+  }
 
-  UnwrappedLine line(llvm::ArrayRef<Chunk> Chunks) {
+  UnwrappedLine line(llvm::ArrayRef<Chunk> Chunks, unsigned Level = 0) {
     UnwrappedLine Result;
+    Result.Level = Level;
     for (const Chunk &Chunk : Chunks) {
       Result.Tokens.insert(Result.Tokens.end(), Chunk.Tokens.begin(),
                            Chunk.Tokens.end());
@@ -186,6 +190,8 @@ class MacroCallReconstructorTest : public ::testing::Test {
 };
 
 bool matchesTokens(const UnwrappedLine &L1, const UnwrappedLine &L2) {
+  if (L1.Level != L2.Level)
+    return false;
   if (L1.Tokens.size() != L2.Tokens.size())
     return false;
   for (auto L1It = L1.Tokens.begin(), L2It = L2.Tokens.begin();
@@ -288,7 +294,8 @@ TEST_F(MacroCallReconstructorTest, StatementSequence) {
               matchesLine(line(
                   {U1.consume("SEMI"),
                    children({line({U2.consume("SEMI"),
-                                   children({line(U3.consume("SEMI"))})})})})));
+                                   children({line(U3.consume("SEMI"), 2)})},
+                                  1)})})));
 }
 
 TEST_F(MacroCallReconstructorTest, NestedBlock) {
@@ -337,9 +344,9 @@ TEST_F(MacroCallReconstructorTest, NestedBlock) {
 
   auto Expected = line({Chunk2Start,
                         children({
-                            line(Chunk2LBrace),
-                            line({Chunk1, Chunk2Mid}),
-                            line(Chunk2RBrace),
+                            line(Chunk2LBrace, 1),
+                            line({Chunk1, Chunk2Mid}, 1),
+                            line(Chunk2RBrace, 1),
                         }),
                         Chunk2End});
   EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
@@ -379,9 +386,11 @@ TEST_F(MacroCallReconstructorTest, NestedChildBlocks) {
   Unexp.addLine(
       line({E.consume("f([] {"),
             children({line({E.consume("f([] {"),
-                            children({line(E.consume("return a * b;"))}),
-                            E.consume("})")})}),
-            E.consume("})")}));
+                            children({line(E.consume("return a * b;"), 3)}),
+                            E.consume("})")},
+                           2)}),
+            E.consume("})")},
+           1));
   Unexp.addLine(line(E.consume("}")));
   EXPECT_TRUE(Unexp.finished());
 
@@ -407,13 +416,15 @@ TEST_F(MacroCallReconstructorTest, NestedChildBlocks) {
   auto Expected = line({
       Chunk3Start,
       children({
-          line(Chunk3LBrace),
-          line({
-              Chunk2Start,
-              Chunk1,
-              Chunk2End,
-          }),
-          line(Chunk3RBrace),
+          line(Chunk3LBrace, 1),
+          line(
+              {
+                  Chunk2Start,
+                  Chunk1,
+                  Chunk2End,
+              },
+              2),
+          line(Chunk3RBrace, 1),
       }),
       Chunk3End,
   });
@@ -469,8 +480,8 @@ TEST_F(MacroCallReconstructorTest, MultipleToplevelUnwrappedLines) {
   auto Expected = line({
       U.consume("ID("),
       children({
-          line(U.consume("x;")),
-          line(U.consume("x")),
+          line(U.consume("x;"), 1),
+          line(U.consume("x"), 1),
       }),
       U.consume(", y)"),
   });
@@ -524,9 +535,9 @@ TEST_F(MacroCallReconstructorTest, NestedCallsMultipleLines) {
   auto Expected = line({
       Chunk2Start,
       children({
-          line({Chunk2LBrace}),
-          line({Chunk1, Chunk2Semi}),
-          line({Chunk2RBrace}),
+          line({Chunk2LBrace}, 1),
+          line({Chunk1, Chunk2Semi}, 1),
+          line({Chunk2RBrace}, 1),
       }),
       Chunk2End,
   });
@@ -556,15 +567,17 @@ TEST_F(MacroCallReconstructorTest, ParentOutsideMacroCall) {
   auto Expected = line({
       Prefix,
       children({
-          line({
-              U.consume("ID("),
-              children({
-                  line(U.consume("x;")),
-                  line(U.consume("y;")),
-                  line(U.consume("z;")),
-              }),
-              U.consume(")"),
-          }),
+          line(
+              {
+                  U.consume("ID("),
+                  children({
+                      line(U.consume("x;"), 2),
+                      line(U.consume("y;"), 2),
+                      line(U.consume("z;"), 2),
+                  }),
+                  U.consume(")"),
+              },
+              1),
       }),
       Postfix,
   });
@@ -590,7 +603,7 @@ TEST_F(MacroCallReconstructorTest, ChildrenSplitAcrossArguments) {
   Matcher U(Call, Lex);
   auto Expected = line({
       U.consume("CALL({"),
-      children(line(U.consume("a;"))),
+      children(line(U.consume("a;"), 1)),
       U.consume(", b; })"),
   });
   EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
@@ -620,16 +633,20 @@ TEST_F(MacroCallReconstructorTest, ChildrenAfterMacroCall) {
   Matcher U(Call, Lex);
   auto Expected = line({
       U.consume("CALL({"),
-      children(line(U.consume("a"))),
+      children(line(U.consume("a"), 1)),
       U.consume(", b)"),
       Semi,
-      children(line({
-          SecondLine,
-          children(line({
-              ThirdLine,
-              Postfix,
-          })),
-      })),
+      children(line(
+          {
+              SecondLine,
+              children(line(
+                  {
+                      ThirdLine,
+                      Postfix,
+                  },
+                  2)),
+          },
+          1)),
   });
   EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
 }
@@ -655,7 +672,37 @@ TEST_F(MacroCallReconstructorTest, InvalidCodeSplittingBracesAcrossArgs) {
   Matcher U(Call, Lex);
   auto Expected = line({
       Prefix,
-      children({line(U.consume("M({,x,)"))}),
+      children({line(U.consume("M({,x,)"), 1)}),
+  });
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, IndentLevelInExpandedCode) {
+  auto Macros = createExpander({"ID(a)=a"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("ID", {std::string("[] { { x; } }")});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line({
+      E.consume("[] {"),
+      children({
+          line(E.consume("{"), 1),
+          line(E.consume("x;"), 2),
+          line(E.consume("}"), 1),
+      }),
+      E.consume("}"),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      U.consume("ID([] {"),
+      children({
+          line(U.consume("{"), 1),
+          line(U.consume("x;"), 2),
+          line(U.consume("}"), 1),
+      }),
+      U.consume("})"),
   });
   EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
 }



More information about the cfe-commits mailing list