[clang] d6d0dc1 - [clang-format] Add MacroUnexpander.

Manuel Klimek via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 12 00:12:12 PDT 2022


Author: Manuel Klimek
Date: 2022-07-12T07:11:46Z
New Revision: d6d0dc1f45377ddaf5c10a48d64b09308b71501a

URL: https://github.com/llvm/llvm-project/commit/d6d0dc1f45377ddaf5c10a48d64b09308b71501a
DIFF: https://github.com/llvm/llvm-project/commit/d6d0dc1f45377ddaf5c10a48d64b09308b71501a.diff

LOG: [clang-format] Add MacroUnexpander.

MacroUnexpander applies the structural formatting of expanded lines into
UnwrappedLines to the corresponding unexpanded macro calls, resulting in
UnwrappedLines for the macro calls the user typed.

Differential Revision: https://reviews.llvm.org/D88299

Added: 
    clang/lib/Format/MacroCallReconstructor.cpp
    clang/unittests/Format/MacroCallReconstructorTest.cpp

Modified: 
    clang/lib/Format/CMakeLists.txt
    clang/lib/Format/FormatToken.h
    clang/lib/Format/Macros.h
    clang/lib/Format/UnwrappedLineParser.h
    clang/unittests/Format/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index ca455157ae44a..4ea02ea72bc77 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -8,6 +8,7 @@ add_clang_library(clangFormat
   Format.cpp
   FormatToken.cpp
   FormatTokenLexer.cpp
+  MacroCallReconstructor.cpp
   MacroExpander.cpp
   NamespaceEndCommentsFixer.cpp
   QualifierAlignmentFixer.cpp

diff  --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index b6cc021affae3..73e32979853f5 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -497,6 +497,15 @@ struct FormatToken {
   // in a configured macro expansion.
   llvm::Optional<MacroExpansion> MacroCtx;
 
+  /// When macro expansion introduces nodes with children, those are marked as
+  /// \c MacroParent.
+  /// FIXME: The formatting code currently hard-codes the assumption that
+  /// child nodes are introduced by blocks following an opening brace.
+  /// This is deeply baked into the code and disentangling this will require
+  /// signficant refactorings. \c MacroParent allows us to special-case the
+  /// cases in which we treat parents as block-openers for now.
+  bool MacroParent = false;
+
   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
   bool is(TokenType TT) const { return getType() == TT; }
   bool is(const IdentifierInfo *II) const {

diff  --git a/clang/lib/Format/MacroCallReconstructor.cpp b/clang/lib/Format/MacroCallReconstructor.cpp
new file mode 100644
index 0000000000000..67711cc91d0b8
--- /dev/null
+++ b/clang/lib/Format/MacroCallReconstructor.cpp
@@ -0,0 +1,569 @@
+//===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of MacroCallReconstructor, which fits
+/// an reconstructed macro call to a parsed set of UnwrappedLines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Macros.h"
+
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Debug.h"
+#include <cassert>
+
+#define DEBUG_TYPE "format-reconstruct"
+
+namespace clang {
+namespace format {
+
+// Call \p Call for each token in the unwrapped line given, passing
+// the token, its parent and whether it is the first token in the line.
+template <typename T>
+void forEachToken(const UnwrappedLine &Line, const T &Call,
+                  FormatToken *Parent = nullptr) {
+  bool First = true;
+  for (const auto &N : Line.Tokens) {
+    Call(N.Tok, Parent, First);
+    First = false;
+    for (const auto &Child : N.Children) {
+      forEachToken(Child, Call, N.Tok);
+    }
+  }
+}
+
+MacroCallReconstructor::MacroCallReconstructor(
+    unsigned Level,
+    const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
+        &ActiveExpansions)
+    : Level(Level), IdToReconstructed(ActiveExpansions) {
+  Result.Tokens.push_back(std::make_unique<LineNode>());
+  ActiveReconstructedLines.push_back(&Result);
+}
+
+void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
+  assert(State != Finalized);
+  LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
+  forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
+    add(Token, Parent, First);
+  });
+  assert(InProgress || finished());
+}
+
+UnwrappedLine MacroCallReconstructor::takeResult() && {
+  finalize();
+  assert(Result.Tokens.size() == 1 && Result.Tokens.front()->Children.size() == 1);
+  UnwrappedLine Final =
+      createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
+  assert(!Final.Tokens.empty());
+  return Final;
+}
+
+// Reconstruct the position of the next \p Token, given its parent \p
+// ExpandedParent in the incoming unwrapped line. \p First specifies whether it
+// is the first token in a given unwrapped line.
+void MacroCallReconstructor::add(FormatToken *Token,
+                                 FormatToken *ExpandedParent, bool First) {
+  LLVM_DEBUG(
+      llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
+                   << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
+                   << ", First: " << First << "\n");
+  // In order to be able to find the correct parent in the reconstructed token
+  // stream, we need to continue the last open reconstruction until we find the
+  // given token if it is part of the reconstructed token stream.
+  //
+  // Note that hidden tokens can be part of the reconstructed stream in nested
+  // macro calls.
+  // For example, given
+  //   #define C(x, y) x y
+  //   #define B(x) {x}
+  // And the call:
+  //   C(a, B(b))
+  // The outer macro call will be C(a, {b}), and the hidden token '}' can be
+  // found in the reconstructed token stream of that expansion level.
+  // In the expanded token stream
+  //   a {b}
+  // 'b' is a child of '{'. We need to continue the open expansion of the ','
+  // in the call of 'C' in order to correctly set the ',' as the parent of '{',
+  // so we later set the spelled token 'b' as a child of the ','.
+  if (!ActiveExpansions.empty() && Token->MacroCtx &&
+      (Token->MacroCtx->Role != MR_Hidden ||
+       ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
+    if (bool PassedMacroComma = reconstructActiveCallUntil(Token))
+      First = true;
+  }
+
+  prepareParent(ExpandedParent, First);
+
+  if (Token->MacroCtx) {
+    // If this token was generated by a macro call, add the reconstructed
+    // equivalent of the token.
+    reconstruct(Token);
+  } else {
+    // Otherwise, we add it to the current line.
+    appendToken(Token);
+  }
+}
+
+// Adjusts the stack of active reconstructed lines so we're ready to push
+// tokens. The tokens to be pushed are children of ExpandedParent in the
+// expanded code.
+//
+// This may entail:
+// - creating a new line, if the parent is on the active line
+// - popping active lines, if the parent is further up the stack
+//
+// Postcondition:
+// ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
+// reconstructed replacement token as a parent (when possible) - that is, the
+// last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
+// is the parent of ActiveReconstructedLines.back() in the reconstructed
+// unwrapped line.
+void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
+                                           bool NewLine) {
+  LLVM_DEBUG({
+    llvm::dbgs() << "ParentMap:\n";
+    debugParentMap();
+  });
+  // We want to find the parent in the new unwrapped line, where the expanded
+  // parent might have been replaced during reconstruction.
+  FormatToken *Parent = getParentInResult(ExpandedParent);
+  LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
+                          << (Parent ? Parent->TokenText : "<null>") << "\n");
+
+  FormatToken *OpenMacroParent = nullptr;
+  if (!MacroCallStructure.empty()) {
+    // Inside a macro expansion, it is possible to lose track of the correct
+    // parent - either because it is already popped, for example because it was
+    // in a 
diff erent macro argument (e.g. M({, })), or when we work on invalid
+    // code.
+    // Thus, we use the innermost macro call's parent as the parent at which
+    // we stop; this allows us to stay within the macro expansion and keeps
+    // any problems confined to the extent of the macro call.
+    OpenMacroParent =
+        getParentInResult(MacroCallStructure.back().MacroCallLParen);
+    LLVM_DEBUG(llvm::dbgs()
+               << "MacroCallLParen: "
+               << MacroCallStructure.back().MacroCallLParen->TokenText
+               << ", OpenMacroParent: "
+               << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
+               << "\n");
+  }
+  if (NewLine ||
+      (!ActiveReconstructedLines.back()->Tokens.empty() &&
+       Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
+    // If we are at the first token in a new line, we want to also
+    // create a new line in the resulting reconstructed unwrapped line.
+    while (ActiveReconstructedLines.back()->Tokens.empty() ||
+           (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
+            ActiveReconstructedLines.back()->Tokens.back()->Tok !=
+                OpenMacroParent)) {
+      ActiveReconstructedLines.pop_back();
+      assert(!ActiveReconstructedLines.empty());
+    }
+    assert(!ActiveReconstructedLines.empty());
+    ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
+        std::make_unique<Line>());
+    ActiveReconstructedLines.push_back(
+        &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
+  } else if (parentLine().Tokens.back()->Tok != Parent) {
+    // If we're not the first token in a new line, pop lines until we find
+    // the child of \c Parent in the stack.
+    while (Parent != parentLine().Tokens.back()->Tok &&
+           parentLine().Tokens.back()->Tok &&
+           parentLine().Tokens.back()->Tok != OpenMacroParent) {
+      ActiveReconstructedLines.pop_back();
+      assert(!ActiveReconstructedLines.empty());
+    }
+  }
+  assert(!ActiveReconstructedLines.empty());
+}
+
+// For a given \p Parent in the incoming expanded token stream, find the
+// corresponding parent in the output.
+FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
+  FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
+  if (!Mapped)
+    return Parent;
+  for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent)) {
+    Parent = Mapped;
+  }
+  // If we use a 
diff erent token than the parent in the expanded token stream
+  // as parent, mark it as a special parent, so the formatting code knows it
+  // needs to have its children formatted.
+  Parent->MacroParent = true;
+  return Parent;
+}
+
+// Reconstruct a \p Token that was expanded from a macro call.
+void MacroCallReconstructor::reconstruct(FormatToken *Token) {
+  assert(Token->MacroCtx);
+  // A single token can be the only result of a macro call:
+  // Given: #define ID(x, y) ;
+  // And the call: ID(<some>, <tokens>)
+  // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
+  if (Token->MacroCtx->StartOfExpansion) {
+    startReconstruction(Token);
+    // If the order of tokens in the expanded token stream is not the
+    // same as the order of tokens in the reconstructed stream, we need
+    // to reconstruct tokens that arrive later in the stream.
+    if (Token->MacroCtx->Role != MR_Hidden) {
+      reconstructActiveCallUntil(Token);
+    }
+  }
+  assert(!ActiveExpansions.empty());
+  if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
+    assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
+    if (Token->MacroCtx->Role != MR_Hidden) {
+      // The current token in the reconstructed token stream must be the token
+      // we're looking for - we either arrive here after startReconstruction,
+      // which initiates the stream to the first token, or after
+      // continueReconstructionUntil skipped until the expected token in the
+      // reconstructed stream at the start of add(...).
+      assert(ActiveExpansions.back().SpelledI->Tok == Token);
+      processNextReconstructed();
+    } else if (!currentLine()->Tokens.empty()) {
+      // Map all hidden tokens to the last visible token in the output.
+      // If the hidden token is a parent, we'll use the last visible
+      // token as the parent of the hidden token's children.
+      SpelledParentToReconstructedParent[Token] =
+          currentLine()->Tokens.back()->Tok;
+    } else {
+      for (auto I = ActiveReconstructedLines.rbegin(),
+                E = ActiveReconstructedLines.rend();
+           I != E; ++I) {
+        if (!(*I)->Tokens.empty()) {
+          SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
+          break;
+        }
+      }
+    }
+  }
+  if (Token->MacroCtx->EndOfExpansion)
+    endReconstruction(Token);
+}
+
+// Given a \p Token that starts an expansion, reconstruct the beginning of the
+// macro call.
+// For example, given: #define ID(x) x
+// And the call: ID(int a)
+// Reconstructs: ID(
+void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
+  assert(Token->MacroCtx);
+  assert(!Token->MacroCtx->ExpandedFrom.empty());
+  assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
+#ifndef NDEBUG
+  // Check that the token's reconstruction stack matches our current
+  // reconstruction stack.
+  for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
+    assert(ActiveExpansions[I].ID ==
+           Token->MacroCtx
+               ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
+  }
+#endif
+  // Start reconstruction for all calls for which this token is the first token
+  // generated by the call.
+  // Note that the token's expanded from stack is inside-to-outside, and the
+  // expansions for which this token is not the first are the outermost ones.
+  ArrayRef<FormatToken *> StartedMacros =
+      makeArrayRef(Token->MacroCtx->ExpandedFrom)
+          .drop_back(ActiveExpansions.size());
+  assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
+  // We reconstruct macro calls outside-to-inside.
+  for (FormatToken *ID : llvm::reverse(StartedMacros)) {
+    // We found a macro call to be reconstructed; the next time our
+    // reconstruction stack is empty we know we finished an reconstruction.
+#ifndef NDEBUG
+    State = InProgress;
+#endif
+    // Put the reconstructed macro call's token into our reconstruction stack.
+    auto IU = IdToReconstructed.find(ID);
+    assert(IU != IdToReconstructed.end());
+    ActiveExpansions.push_back(
+        {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
+    // Process the macro call's identifier.
+    processNextReconstructed();
+    if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
+      continue;
+    if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
+      // Process the optional opening parenthesis.
+      processNextReconstructed();
+    }
+  }
+}
+
+// Add all tokens in the reconstruction stream to the output until we find the
+// given \p Token.
+bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
+  assert(!ActiveExpansions.empty());
+  bool PassedMacroComma = false;
+  // FIXME: If Token was already expanded earlier, due to
+  // a change in order, we will not find it, but need to
+  // skip it.
+  while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
+         ActiveExpansions.back().SpelledI->Tok != Token) {
+    PassedMacroComma = processNextReconstructed() || PassedMacroComma;
+  }
+  return PassedMacroComma;
+}
+
+// End all reconstructions for which \p Token is the final token.
+void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
+  assert(Token->MacroCtx &&
+         (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
+  for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
+#ifndef NDEBUG
+    // Check all remaining tokens but the final closing parenthesis and optional
+    // trailing comment were already reconstructed at an inner expansion level.
+    for (auto T = ActiveExpansions.back().SpelledI;
+         T != ActiveExpansions.back().SpelledE; ++T) {
+      FormatToken *Token = T->Tok;
+      bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
+                           std::next(T)->Tok->isTrailingComment()) &&
+                          !Token->MacroCtx && Token->is(tok::r_paren);
+      bool TrailingComment = Token->isTrailingComment();
+      bool PreviousLevel =
+          Token->MacroCtx &&
+          (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
+      if (!ClosingParen && !TrailingComment && !PreviousLevel) {
+        llvm::dbgs() << "At token: " << Token->TokenText << "\n";
+      }
+      // In addition to the following cases, we can also run into this
+      // when a macro call had more arguments than expected; in that case,
+      // the comma and the remaining tokens in the macro call will potentially
+      // end up in the line when we finish the expansion.
+      // FIXME: Add the information which arguments are unused, and assert
+      // one of the cases below plus reconstructed macro argument tokens.
+      // assert(ClosingParen || TrailingComment || PreviousLevel);
+    }
+#endif
+    // Handle the remaining open tokens:
+    // - expand the closing parenthesis, if it exists, including an optional
+    //   trailing comment
+    // - handle tokens that were already reconstructed at an inner expansion
+    //   level
+    // - handle tokens when a macro call had more than the expected number of
+    //   arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
+    //   up with the sequence ", b, c)" being open at the end of the
+    //   reconstruction; we want to gracefully handle that case
+    //
+    // FIXME: See the above debug-check for what we will need to do to be
+    // able to assert this.
+    for (auto T = ActiveExpansions.back().SpelledI;
+         T != ActiveExpansions.back().SpelledE; ++T) {
+      processNextReconstructed();
+    }
+    ActiveExpansions.pop_back();
+  }
+}
+
+void MacroCallReconstructor::debugParentMap() const {
+  llvm::DenseSet<FormatToken *> Values;
+  for (const auto &P : SpelledParentToReconstructedParent)
+    Values.insert(P.second);
+
+  for (const auto &P : SpelledParentToReconstructedParent) {
+    if (Values.contains(P.first))
+      continue;
+    llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
+    for (auto I = SpelledParentToReconstructedParent.find(P.first),
+              E = SpelledParentToReconstructedParent.end();
+         I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
+      llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
+    }
+    llvm::dbgs() << "\n";
+  }
+}
+
+// If visible, add the next token of the reconstructed token sequence to the
+// output. Returns whether reconstruction passed a comma that is part of a
+// macro call.
+bool MacroCallReconstructor::processNextReconstructed() {
+  FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
+  ++ActiveExpansions.back().SpelledI;
+  if (Token->MacroCtx) {
+    // Skip tokens that are not part of the macro call.
+    if (Token->MacroCtx->Role == MR_Hidden) {
+      return false;
+    }
+    // Skip tokens we already expanded during an inner reconstruction.
+    // For example, given: #define ID(x) {x}
+    // And the call: ID(ID(f))
+    // We get two reconstructions:
+    // ID(f) -> {f}
+    // ID({f}) -> {{f}}
+    // We reconstruct f during the first reconstruction, and skip it during the
+    // second reconstruction.
+    if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()) {
+      return false;
+    }
+  }
+  // Tokens that do not have a macro context are tokens in that are part of the
+  // macro call that have not taken part in expansion.
+  if (!Token->MacroCtx) {
+    // Put the parentheses and commas of a macro call into the same line;
+    // if the arguments produce new unwrapped lines, they will become children
+    // of the corresponding opening parenthesis or comma tokens in the
+    // reconstructed call.
+    if (Token->is(tok::l_paren)) {
+      MacroCallStructure.push_back(MacroCallState(
+          currentLine(), parentLine().Tokens.back()->Tok, Token));
+      // All tokens that are children of the previous line's last token in the
+      // reconstructed token stream will now be children of the l_paren token.
+      // For example, for the line containing the macro calls:
+      //   auto x = ID({ID(2)});
+      // We will build up a map <null> -> ( -> ( with the first and second
+      // l_paren of the macro call respectively. New lines that come in with a
+      // <null> parent will then become children of the l_paren token of the
+      // currently innermost macro call.
+      SpelledParentToReconstructedParent[MacroCallStructure.back()
+                                             .ParentLastToken] = Token;
+      appendToken(Token);
+      prepareParent(Token, /*NewLine=*/true);
+      Token->MacroParent = true;
+      return false;
+    }
+    if (!MacroCallStructure.empty()) {
+      if (Token->is(tok::comma)) {
+        // Make new lines inside the next argument children of the comma token.
+        SpelledParentToReconstructedParent
+            [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
+        Token->MacroParent = true;
+        appendToken(Token, MacroCallStructure.back().Line);
+        prepareParent(Token, /*NewLine=*/true);
+        return true;
+      }
+      if (Token->is(tok::r_paren)) {
+        appendToken(Token, MacroCallStructure.back().Line);
+        SpelledParentToReconstructedParent.erase(
+            MacroCallStructure.back().ParentLastToken);
+        MacroCallStructure.pop_back();
+        return false;
+      }
+    }
+  }
+  // Note that any tokens that are tagged with MR_None have been passed as
+  // arguments to the macro that have not been expanded, for example:
+  // Given: #define ID(X) x
+  // When calling: ID(a, b)
+  // 'b' will be part of the reconstructed token stream, but tagged MR_None.
+  // Given that erroring out in this case would be disruptive, we continue
+  // pushing the (unformatted) token.
+  // FIXME: This can lead to unfortunate formatting decisions - give the user
+  // a hint that their macro definition is broken.
+  appendToken(Token);
+  return false;
+}
+
+void MacroCallReconstructor::finalize() {
+#ifndef NDEBUG
+  assert(State != Finalized && finished());
+  State = Finalized;
+#endif
+
+  // We created corresponding unwrapped lines for each incoming line as children
+  // the the toplevel null token.
+  assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
+  LLVM_DEBUG({
+    llvm::dbgs() << "Finalizing reconstructed lines:\n";
+    debug(Result, 0);
+  });
+
+  // The first line becomes the top level line in the resulting unwrapped line.
+  LineNode &Top = *Result.Tokens.front();
+  auto *I = Top.Children.begin();
+  // Every subsequent line will become a child of the last token in the previous
+  // line, which is the token prior to the first token in the line.
+  LineNode *Last = (*I)->Tokens.back().get();
+  ++I;
+  for (auto *E = Top.Children.end(); I != E; ++I) {
+    assert(Last->Children.empty());
+    Last->Children.push_back(std::move(*I));
+
+    // Mark the previous line's last token as generated by a macro expansion
+    // so the formatting algorithm can take that into account.
+    Last->Tok->MacroParent = true;
+
+    Last = Last->Children.back()->Tokens.back().get();
+  }
+  Top.Children.resize(1);
+}
+
+void MacroCallReconstructor::appendToken(FormatToken *Token, Line *L) {
+  L = L ? L : currentLine();
+  LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
+  L->Tokens.push_back(std::make_unique<LineNode>(Token));
+}
+
+UnwrappedLine MacroCallReconstructor::createUnwrappedLine(const Line &Line,
+                                                          int Level) {
+  UnwrappedLine Result;
+  Result.Level = Level;
+  for (const auto &N : Line.Tokens) {
+    Result.Tokens.push_back(N->Tok);
+    UnwrappedLineNode &Current = Result.Tokens.back();
+    for (const auto &Child : N->Children) {
+      if (Child->Tokens.empty())
+        continue;
+      Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
+    }
+    if (Current.Children.size() == 1 &&
+        Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
+      Result.Tokens.splice(Result.Tokens.end(),
+                           Current.Children.front().Tokens);
+      Current.Children.clear();
+    }
+  }
+  return Result;
+}
+
+void MacroCallReconstructor::debug(const Line &Line, int Level) {
+  for (int i = 0; i < Level; ++i)
+    llvm::dbgs() << " ";
+  for (const auto &N : Line.Tokens) {
+    if (!N)
+      continue;
+    if (N->Tok)
+      llvm::dbgs() << N->Tok->TokenText << " ";
+    for (const auto &Child : N->Children) {
+      llvm::dbgs() << "\n";
+      debug(*Child, Level + 1);
+      for (int i = 0; i < Level; ++i)
+        llvm::dbgs() << " ";
+    }
+  }
+  llvm::dbgs() << "\n";
+}
+
+MacroCallReconstructor::Line &MacroCallReconstructor::parentLine() {
+  return **std::prev(std::prev(ActiveReconstructedLines.end()));
+}
+
+MacroCallReconstructor::Line *MacroCallReconstructor::currentLine() {
+  return ActiveReconstructedLines.back();
+}
+
+MacroCallReconstructor::MacroCallState::MacroCallState(
+    MacroCallReconstructor::Line *Line, FormatToken *ParentLastToken,
+    FormatToken *MacroCallLParen)
+    : Line(Line), ParentLastToken(ParentLastToken),
+      MacroCallLParen(MacroCallLParen) {
+  LLVM_DEBUG(
+      llvm::dbgs() << "ParentLastToken: "
+                   << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
+                   << "\n");
+
+  assert(MacroCallLParen->is(tok::l_paren));
+}
+
+} // namespace format
+} // namespace clang

diff  --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h
index da03beb091454..59774647a5694 100644
--- a/clang/lib/Format/Macros.h
+++ b/clang/lib/Format/Macros.h
@@ -1,4 +1,4 @@
-//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===//
+//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -22,40 +22,38 @@
 /// spelled token streams into expanded token streams when it encounters a
 /// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
 /// from the expanded token stream.
-/// After the expanded unwrapped lines are parsed, the MacroUnexpander matches
-/// the spelled token stream into unwrapped lines that best resemble the
-/// structure of the expanded unwrapped lines.
+/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
+/// matches the spelled token stream into unwrapped lines that best resemble the
+/// structure of the expanded unwrapped lines. These reconstructed unwrapped
+/// lines are aliasing the tokens in the expanded token stream, so that token
+/// annotations will be reused when formatting the spelled macro calls.
 ///
-/// When formatting, clang-format formats the expanded unwrapped lines first,
-/// determining the token types. Next, it formats the spelled unwrapped lines,
-/// keeping the token types fixed, while allowing other formatting decisions
-/// to change.
+/// When formatting, clang-format annotates and formats the expanded unwrapped
+/// lines first, determining the token types. Next, it formats the spelled
+/// unwrapped lines, keeping the token types fixed, while allowing other
+/// formatting decisions to change.
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef CLANG_LIB_FORMAT_MACROS_H
 #define CLANG_LIB_FORMAT_MACROS_H
 
+#include <list>
+#include <map>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
-#include "Encoding.h"
 #include "FormatToken.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 
-namespace llvm {
-class MemoryBuffer;
-} // namespace llvm
-
 namespace clang {
-class IdentifierTable;
-class SourceManager;
-
 namespace format {
-struct FormatStyle;
+
+struct UnwrappedLine;
+struct UnwrappedLineNode;
 
 /// Takes a set of macro definitions as strings and allows expanding calls to
 /// those macros.
@@ -134,6 +132,249 @@ class MacroExpander {
   llvm::StringMap<Definition> Definitions;
 };
 
+/// Converts a sequence of UnwrappedLines containing expanded macros into a
+/// single UnwrappedLine containing the macro calls.  This UnwrappedLine may be
+/// broken into child lines, in a way that best conveys the structure of the
+/// expanded code.
+///
+/// In the simplest case, a spelled UnwrappedLine contains one macro, and after
+/// expanding it we have one expanded UnwrappedLine.  In general, macro
+/// expansions can span UnwrappedLines, and multiple macros can contribute
+/// tokens to the same line.  We keep consuming expanded lines until:
+/// *   all expansions that started have finished (we're not chopping any macros
+///     in half)
+/// *   *and* we've reached the end of a *spelled* unwrapped line.
+///
+/// A single UnwrappedLine represents this chunk of code.
+///
+/// After this point, the state of the spelled/expanded stream is "in sync"
+/// (both at the start of an UnwrappedLine, with no macros open), so the
+/// Unexpander can be thrown away and parsing can continue.
+///
+/// Given a mapping from the macro name identifier token in the macro call
+/// to the tokens of the macro call, for example:
+/// CLASSA -> CLASSA({public: void x();})
+///
+/// When getting the formatted lines of the expansion via the \c addLine method
+/// (each '->' specifies a call to \c addLine ):
+/// -> class A {
+/// -> public:
+/// ->   void x();
+/// -> };
+///
+/// Creates the tree of unwrapped lines containing the macro call tokens so that
+/// the macro call tokens fit the semantic structure of the expanded formatted
+/// lines:
+/// -> CLASSA({
+/// -> public:
+/// ->   void x();
+/// -> })
+class MacroCallReconstructor {
+public:
+  /// Create an Reconstructor whose resulting \p UnwrappedLine will start at
+  /// \p Level, using the map from name identifier token to the corresponding
+  /// tokens of the spelled macro call.
+  MacroCallReconstructor(
+      unsigned Level,
+      const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
+          &ActiveExpansions);
+
+  /// For the given \p Line, match all occurences of tokens expanded from a
+  /// macro to unwrapped lines in the spelled macro call so that the resulting
+  /// tree of unwrapped lines best resembles the structure of unwrapped lines
+  /// passed in via \c addLine.
+  void addLine(const UnwrappedLine &Line);
+
+  /// Check whether at the current state there is no open macro expansion
+  /// that needs to be processed to finish an macro call.
+  /// Only when \c finished() is true, \c takeResult() can be called to retrieve
+  /// the resulting \c UnwrappedLine.
+  /// If there are multiple subsequent macro calls within an unwrapped line in
+  /// the spelled token stream, the calling code may also continue to call
+  /// \c addLine() when \c finished() is true.
+  bool finished() const { return ActiveExpansions.empty(); }
+
+  /// Retrieve the formatted \c UnwrappedLine containing the orginal
+  /// macro calls, formatted according to the expanded token stream received
+  /// via \c addLine().
+  /// Generally, this line tries to have the same structure as the expanded,
+  /// formatted unwrapped lines handed in via \c addLine(), with the exception
+  /// that for multiple top-level lines, each subsequent line will be the
+  /// child of the last token in its predecessor. This representation is chosen
+  /// because it is a precondition to the formatter that we get what looks like
+  /// a single statement in a single \c UnwrappedLine (i.e. matching parens).
+  ///
+  /// If a token in a macro argument is a child of a token in the expansion,
+  /// the parent will be the corresponding token in the macro call.
+  /// For example:
+  ///   #define C(a, b) class C { a b
+  ///   C(int x;, int y;)
+  /// would expand to
+  ///   class C { int x; int y;
+  /// where in a formatted line "int x;" and "int y;" would both be new separate
+  /// lines.
+  ///
+  /// In the result, "int x;" will be a child of the opening parenthesis in "C("
+  /// and "int y;" will be a child of the "," token:
+  ///   C (
+  ///     \- int x;
+  ///     ,
+  ///     \- int y;
+  ///     )
+  UnwrappedLine takeResult() &&;
+
+private:
+  void add(FormatToken *Token, FormatToken *ExpandedParent, bool First);
+  void prepareParent(FormatToken *ExpandedParent, bool First);
+  FormatToken *getParentInResult(FormatToken *Parent);
+  void reconstruct(FormatToken *Token);
+  void startReconstruction(FormatToken *Token);
+  bool reconstructActiveCallUntil(FormatToken *Token);
+  void endReconstruction(FormatToken *Token);
+  bool processNextReconstructed();
+  void finalize();
+
+  struct Line;
+
+  void appendToken(FormatToken *Token, Line *L = nullptr);
+  UnwrappedLine createUnwrappedLine(const Line &Line, int Level);
+  void debug(const Line &Line, int Level);
+  Line &parentLine();
+  Line *currentLine();
+  void debugParentMap() const;
+
+#ifndef NDEBUG
+  enum ReconstructorState {
+    Start,      // No macro expansion was found in the input yet.
+    InProgress, // During a macro reconstruction.
+    Finalized,  // Past macro reconstruction, the result is finalized.
+  };
+  ReconstructorState State = Start;
+#endif
+
+  // Node in which we build up the resulting unwrapped line; this type is
+  // analogous to UnwrappedLineNode.
+  struct LineNode {
+    LineNode() = default;
+    LineNode(FormatToken *Tok) : Tok(Tok) {}
+    FormatToken *Tok = nullptr;
+    llvm::SmallVector<std::unique_ptr<Line>> Children;
+  };
+
+  // Line in which we build up the resulting unwrapped line.
+  // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
+  // instead of rolling our own type.
+  struct Line {
+    llvm::SmallVector<std::unique_ptr<LineNode>> Tokens;
+  };
+
+  // The line in which we collect the resulting reconstructed output.
+  // To reduce special cases in the algorithm, the first level of the line
+  // contains a single null token that has the reconstructed incoming
+  // lines as children.
+  // In the end, we stich the lines together so that each subsequent line
+  // is a child of the last token of the previous line. This is necessary
+  // in order to format the overall expression as a single logical line -
+  // if we created separate lines, we'd format them with their own top-level
+  // indent depending on the semantic structure, which is not desired.
+  Line Result;
+
+  // Stack of currently "open" lines, where each line's predecessor's last
+  // token is the parent token for that line.
+  llvm::SmallVector<Line *> ActiveReconstructedLines;
+
+  // Maps from the expanded token to the token that takes its place in the
+  // reconstructed token stream in terms of parent-child relationships.
+  // Note that it might take multiple steps to arrive at the correct
+  // parent in the output.
+  // Given: #define C(a, b) []() { a; b; }
+  // And a call: C(f(), g())
+  // The structure in the incoming formatted unwrapped line will be:
+  // []() {
+  //      |- f();
+  //      \- g();
+  // }
+  // with f and g being children of the opening brace.
+  // In the reconstructed call:
+  // C(f(), g())
+  //  \- f()
+  //      \- g()
+  // We want f to be a child of the opening parenthesis and g to be a child
+  // of the comma token in the macro call.
+  // Thus, we map
+  // { -> (
+  // and add
+  // ( -> ,
+  // once we're past the comma in the reconstruction.
+  llvm::DenseMap<FormatToken *, FormatToken *>
+      SpelledParentToReconstructedParent;
+
+  // Keeps track of a single expansion while we're reconstructing tokens it
+  // generated.
+  struct Expansion {
+    // The identifier token of the macro call.
+    FormatToken *ID;
+    // Our current position in the reconstruction.
+    std::list<UnwrappedLineNode>::iterator SpelledI;
+    // The end of the reconstructed token sequence.
+    std::list<UnwrappedLineNode>::iterator SpelledE;
+  };
+
+  // Stack of macro calls for which we're in the middle of an expansion.
+  llvm::SmallVector<Expansion> ActiveExpansions;
+
+  struct MacroCallState {
+    MacroCallState(Line *Line, FormatToken *ParentLastToken,
+                   FormatToken *MacroCallLParen);
+
+    Line *Line;
+
+    // The last token in the parent line or expansion, or nullptr if the macro
+    // expansion is on a top-level line.
+    //
+    // For example, in the macro call:
+    //   auto f = []() { ID(1); };
+    // The MacroCallState for ID will have '{' as ParentLastToken.
+    //
+    // In the macro call:
+    //   ID(ID(void f()));
+    // The MacroCallState of the outer ID will have nullptr as ParentLastToken,
+    // while the MacroCallState for the inner ID will have the '(' of the outer
+    // ID as ParentLastToken.
+    //
+    // In the macro call:
+    //   ID2(a, ID(b));
+    // The MacroCallState of ID will have ',' as ParentLastToken.
+    FormatToken *ParentLastToken;
+
+    // The l_paren of this MacroCallState's macro call.
+    FormatToken *MacroCallLParen;
+  };
+
+  // Keeps track of the lines into which the opening brace/parenthesis &
+  // argument separating commas for each level in the macro call go in order to
+  // put the corresponding closing brace/parenthesis into the same line in the
+  // output and keep track of which parents in the expanded token stream map to
+  // which tokens in the reconstructed stream.
+  // When an opening brace/parenthesis has children, we want the structure of
+  // the output line to be:
+  // |- MACRO
+  // |- (
+  // |  \- <argument>
+  // |- ,
+  // |  \- <argument>
+  // \- )
+  llvm::SmallVector<MacroCallState> MacroCallStructure;
+
+  // Level the generated UnwrappedLine will be at.
+  const unsigned Level;
+
+  // Maps from identifier of the macro call to an unwrapped line containing
+  // all tokens of the macro call.
+  const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
+      &IdToReconstructed;
+};
+
 } // namespace format
 } // namespace clang
 

diff  --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index 8f63870412d00..3394bfab8b8e8 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -20,6 +20,7 @@
 #include "clang/Format/Format.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Support/Regex.h"
+#include <list>
 #include <stack>
 #include <vector>
 
@@ -38,7 +39,7 @@ struct UnwrappedLine {
   UnwrappedLine();
 
   /// The \c Tokens comprising this \c UnwrappedLine.
-  std::vector<UnwrappedLineNode> Tokens;
+  std::list<UnwrappedLineNode> Tokens;
 
   /// The indent level of the \c UnwrappedLine.
   unsigned Level;

diff  --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index 9cc6c7a96af59..715aec4ed5853 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -18,6 +18,7 @@ add_clang_unittest(FormatTests
   FormatTestTableGen.cpp
   FormatTestTextProto.cpp
   FormatTestVerilog.cpp
+  MacroCallReconstructorTest.cpp
   MacroExpanderTest.cpp
   NamespaceEndCommentsFixerTest.cpp
   QualifierFixerTest.cpp

diff  --git a/clang/unittests/Format/MacroCallReconstructorTest.cpp b/clang/unittests/Format/MacroCallReconstructorTest.cpp
new file mode 100644
index 0000000000000..2bda62aa42be1
--- /dev/null
+++ b/clang/unittests/Format/MacroCallReconstructorTest.cpp
@@ -0,0 +1,688 @@
+#include "../../lib/Format/Macros.h"
+#include "../../lib/Format/UnwrappedLineParser.h"
+#include "TestLexer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <map>
+#include <memory>
+#include <vector>
+
+namespace clang {
+namespace format {
+namespace {
+
+using UnexpandedMap =
+    llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>;
+
+// Keeps track of a sequence of macro expansions.
+//
+// The expanded tokens are accessible via getTokens(), while a map of macro call
+// identifier token to unexpanded token stream is accessible via
+// getUnexpanded().
+class Expansion {
+public:
+  Expansion(TestLexer &Lex, MacroExpander &Macros) : Lex(Lex), Macros(Macros) {}
+
+  // Appends the token stream obtained from expanding the macro Name given
+  // the provided arguments, to be later retrieved with getTokens().
+  // Returns the list of tokens making up the unexpanded macro call.
+  TokenList
+  expand(llvm::StringRef Name,
+         const SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> &Args) {
+    auto *ID = Lex.id(Name);
+    auto UnexpandedLine = std::make_unique<UnwrappedLine>();
+    UnexpandedLine->Tokens.push_back(ID);
+    if (!Args.empty()) {
+      UnexpandedLine->Tokens.push_back(Lex.id("("));
+      for (auto I = Args.begin(), E = Args.end(); I != E; ++I) {
+        if (I != Args.begin())
+          UnexpandedLine->Tokens.push_back(Lex.id(","));
+        UnexpandedLine->Tokens.insert(UnexpandedLine->Tokens.end(), I->begin(),
+                                      I->end());
+      }
+      UnexpandedLine->Tokens.push_back(Lex.id(")"));
+    }
+    Unexpanded[ID] = std::move(UnexpandedLine);
+
+    auto Expanded = uneof(Macros.expand(ID, Args));
+    Tokens.append(Expanded.begin(), Expanded.end());
+
+    TokenList UnexpandedTokens;
+    for (const UnwrappedLineNode &Node : Unexpanded[ID]->Tokens) {
+      UnexpandedTokens.push_back(Node.Tok);
+    }
+    return UnexpandedTokens;
+  }
+
+  TokenList expand(llvm::StringRef Name,
+                   const std::vector<std::string> &Args = {}) {
+    return expand(Name, lexArgs(Args));
+  }
+
+  const UnexpandedMap &getUnexpanded() const { return Unexpanded; }
+
+  const TokenList &getTokens() const { return Tokens; }
+
+private:
+  llvm::SmallVector<TokenList, 1>
+  lexArgs(const std::vector<std::string> &Args) {
+    llvm::SmallVector<TokenList, 1> Result;
+    for (const auto &Arg : Args) {
+      Result.push_back(uneof(Lex.lex(Arg)));
+    }
+    return Result;
+  }
+  llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
+  llvm::SmallVector<FormatToken *, 8> Tokens;
+  TestLexer &Lex;
+  MacroExpander &Macros;
+};
+
+struct Chunk {
+  Chunk(llvm::ArrayRef<FormatToken *> Tokens)
+      : Tokens(Tokens.begin(), Tokens.end()) {}
+  Chunk(llvm::ArrayRef<UnwrappedLine> Children)
+      : Children(Children.begin(), Children.end()) {}
+  llvm::SmallVector<UnwrappedLineNode, 1> Tokens;
+  llvm::SmallVector<UnwrappedLine, 0> Children;
+};
+
+bool tokenMatches(const FormatToken *Left, const FormatToken *Right) {
+  if (Left->getType() == Right->getType() &&
+      Left->TokenText == Right->TokenText)
+    return true;
+  llvm::dbgs() << Left->TokenText << " != " << Right->TokenText << "\n";
+  return false;
+}
+
+// Allows to produce chunks of a token list by typing the code of equal tokens.
+//
+// Created from a list of tokens, users call "consume" to get the next chunk
+// of tokens, checking that they match the written code.
+struct Matcher {
+  Matcher(const TokenList &Tokens, TestLexer &Lex)
+      : Tokens(Tokens), It(this->Tokens.begin()), Lex(Lex) {}
+
+  Chunk consume(StringRef Tokens) {
+    TokenList Result;
+    for (const FormatToken *Token : uneof(Lex.lex(Tokens))) {
+      assert(tokenMatches(*It, Token));
+      Result.push_back(*It);
+      ++It;
+    }
+    return Chunk(Result);
+  }
+
+  TokenList Tokens;
+  TokenList::iterator It;
+  TestLexer &Lex;
+};
+
+UnexpandedMap mergeUnexpanded(const UnexpandedMap &M1,
+                              const UnexpandedMap &M2) {
+  UnexpandedMap Result;
+  for (const auto &KV : M1) {
+    Result[KV.first] = std::make_unique<UnwrappedLine>(*KV.second);
+  }
+  for (const auto &KV : M2) {
+    Result[KV.first] = std::make_unique<UnwrappedLine>(*KV.second);
+  }
+  return Result;
+}
+
+class MacroCallReconstructorTest : public ::testing::Test {
+public:
+  MacroCallReconstructorTest() : Lex(Allocator, Buffers) {}
+
+  std::unique_ptr<MacroExpander>
+  createExpander(const std::vector<std::string> &MacroDefinitions) {
+    return std::make_unique<MacroExpander>(MacroDefinitions,
+                                           Lex.SourceMgr.get(), Lex.Style,
+                                           Lex.Allocator, Lex.IdentTable);
+  }
+
+  UnwrappedLine line(llvm::ArrayRef<FormatToken *> Tokens) {
+    UnwrappedLine Result;
+    for (FormatToken *Tok : Tokens) {
+      Result.Tokens.push_back(UnwrappedLineNode(Tok));
+    }
+    return Result;
+  }
+
+  UnwrappedLine line(llvm::StringRef Text) { return line({lex(Text)}); }
+
+  UnwrappedLine line(llvm::ArrayRef<Chunk> Chunks) {
+    UnwrappedLine Result;
+    for (const Chunk &Chunk : Chunks) {
+      Result.Tokens.insert(Result.Tokens.end(), Chunk.Tokens.begin(),
+                           Chunk.Tokens.end());
+      assert(!Result.Tokens.empty());
+      Result.Tokens.back().Children.append(Chunk.Children.begin(),
+                                           Chunk.Children.end());
+    }
+    return Result;
+  }
+
+  TokenList lex(llvm::StringRef Text) { return uneof(Lex.lex(Text)); }
+
+  Chunk tokens(llvm::StringRef Text) { return Chunk(lex(Text)); }
+
+  Chunk children(llvm::ArrayRef<UnwrappedLine> Children) {
+    return Chunk(Children);
+  }
+
+  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+  std::vector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
+  TestLexer Lex;
+};
+
+bool matchesTokens(const UnwrappedLine &L1, const UnwrappedLine &L2) {
+  if (L1.Tokens.size() != L2.Tokens.size())
+    return false;
+  for (auto L1It = L1.Tokens.begin(), L2It = L2.Tokens.begin();
+       L1It != L1.Tokens.end(); ++L1It, ++L2It) {
+    if (L1It->Tok != L2It->Tok)
+      return false;
+    if (L1It->Children.size() != L2It->Children.size())
+      return false;
+    for (auto L1ChildIt = L1It->Children.begin(),
+              L2ChildIt = L2It->Children.begin();
+         L1ChildIt != L1It->Children.end(); ++L1ChildIt, ++L2ChildIt) {
+      if (!matchesTokens(*L1ChildIt, *L2ChildIt))
+        return false;
+    }
+  }
+  return true;
+}
+MATCHER_P(matchesLine, line, "") { return matchesTokens(arg, line); }
+
+TEST_F(MacroCallReconstructorTest, Identifier) {
+  auto Macros = createExpander({"X=x"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("X");
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Unexp.addLine(line(Exp.getTokens()));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(line(U.consume("X"))));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedLineWithinCall) {
+  auto Macros = createExpander({"C(a)=class X { a; };"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("C", {"void f()"});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("class X {")));
+  EXPECT_FALSE(Unexp.finished());
+  Unexp.addLine(line(E.consume("void f();")));
+  EXPECT_FALSE(Unexp.finished());
+  Unexp.addLine(line(E.consume("};")));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  EXPECT_THAT(std::move(Unexp).takeResult(),
+              matchesLine(line(U.consume("C(void f())"))));
+}
+
+TEST_F(MacroCallReconstructorTest, MultipleLinesInNestedMultiParamsExpansion) {
+  auto Macros = createExpander({"C(a, b)=a b", "B(a)={a}"});
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("B", {"b"});
+  Expansion Exp2(Lex, *Macros);
+  TokenList Call2 = Exp2.expand("C", {uneof(Lex.lex("a")), Exp1.getTokens()});
+
+  UnexpandedMap Unexpanded =
+      mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+  MacroCallReconstructor Unexp(0, Unexpanded);
+  Matcher E(Exp2.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("a")));
+  EXPECT_FALSE(Unexp.finished());
+  Unexp.addLine(line(E.consume("{")));
+  EXPECT_FALSE(Unexp.finished());
+  Unexp.addLine(line(E.consume("b")));
+  EXPECT_FALSE(Unexp.finished());
+  Unexp.addLine(line(E.consume("}")));
+  EXPECT_TRUE(Unexp.finished());
+
+  Matcher U1(Call1, Lex);
+  auto Middle = U1.consume("B(b)");
+  Matcher U2(Call2, Lex);
+  auto Chunk1 = U2.consume("C(a, ");
+  auto Chunk2 = U2.consume("{ b }");
+  auto Chunk3 = U2.consume(")");
+
+  EXPECT_THAT(std::move(Unexp).takeResult(),
+              matchesLine(line({Chunk1, Middle, Chunk3})));
+}
+
+TEST_F(MacroCallReconstructorTest, StatementSequence) {
+  auto Macros = createExpander({"SEMI=;"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call1 = Exp.expand("SEMI");
+  TokenList Call2 = Exp.expand("SEMI");
+  TokenList Call3 = Exp.expand("SEMI");
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line(E.consume(";")));
+  EXPECT_TRUE(Unexp.finished());
+  Unexp.addLine(line(E.consume(";")));
+  EXPECT_TRUE(Unexp.finished());
+  Unexp.addLine(line(E.consume(";")));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U1(Call1, Lex);
+  Matcher U2(Call2, Lex);
+  Matcher U3(Call3, Lex);
+  EXPECT_THAT(std::move(Unexp).takeResult(),
+              matchesLine(line(
+                  {U1.consume("SEMI"),
+                   children({line({U2.consume("SEMI"),
+                                   children({line(U3.consume("SEMI"))})})})})));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedBlock) {
+  auto Macros = createExpander({"ID(x)=x"});
+  // Test: ID({ ID(a *b); })
+  // 1. expand ID(a *b) -> a *b
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("ID", {"a *b"});
+  // 2. expand ID({ a *b; })
+  TokenList Arg;
+  Arg.push_back(Lex.id("{"));
+  Arg.append(Exp1.getTokens().begin(), Exp1.getTokens().end());
+  Arg.push_back(Lex.id(";"));
+  Arg.push_back(Lex.id("}"));
+  Expansion Exp2(Lex, *Macros);
+  TokenList Call2 = Exp2.expand("ID", {Arg});
+
+  // Consume as-if formatted:
+  // {
+  //   a *b;
+  // }
+  UnexpandedMap Unexpanded =
+      mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+  MacroCallReconstructor Unexp(0, Unexpanded);
+  Matcher E(Exp2.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("{")));
+  EXPECT_FALSE(Unexp.finished());
+  Unexp.addLine(line(E.consume("a *b;")));
+  EXPECT_FALSE(Unexp.finished());
+  Unexp.addLine(line(E.consume("}")));
+  EXPECT_TRUE(Unexp.finished());
+
+  // Expect lines:
+  // ID({
+  //   ID(a *b);
+  // })
+  Matcher U1(Call1, Lex);
+  Matcher U2(Call2, Lex);
+  auto Chunk2Start = U2.consume("ID(");
+  auto Chunk2LBrace = U2.consume("{");
+  U2.consume("a *b");
+  auto Chunk2Mid = U2.consume(";");
+  auto Chunk2RBrace = U2.consume("}");
+  auto Chunk2End = U2.consume(")");
+  auto Chunk1 = U1.consume("ID(a *b)");
+
+  auto Expected = line({Chunk2Start,
+                        children({
+                            line(Chunk2LBrace),
+                            line({Chunk1, Chunk2Mid}),
+                            line(Chunk2RBrace),
+                        }),
+                        Chunk2End});
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedChildBlocks) {
+  auto Macros = createExpander({"ID(x)=x", "CALL(x)=f([] { x })"});
+  // Test: ID(CALL(CALL(return a * b;)))
+  // 1. expand CALL(return a * b;)
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("CALL", {"return a * b;"});
+  // 2. expand CALL(f([] { return a * b; }))
+  Expansion Exp2(Lex, *Macros);
+  TokenList Call2 = Exp2.expand("CALL", {Exp1.getTokens()});
+  // 3. expand ID({ f([] { f([] { return a * b; }) }) })
+  TokenList Arg3;
+  Arg3.push_back(Lex.id("{"));
+  Arg3.append(Exp2.getTokens().begin(), Exp2.getTokens().end());
+  Arg3.push_back(Lex.id("}"));
+  Expansion Exp3(Lex, *Macros);
+  TokenList Call3 = Exp3.expand("ID", {Arg3});
+
+  // Consume as-if formatted in three unwrapped lines:
+  // 0: {
+  // 1:   f([] {
+  //        f([] {
+  //          return a * b;
+  //        })
+  //      })
+  // 2: }
+  UnexpandedMap Unexpanded = mergeUnexpanded(
+      Exp1.getUnexpanded(),
+      mergeUnexpanded(Exp2.getUnexpanded(), Exp3.getUnexpanded()));
+  MacroCallReconstructor Unexp(0, Unexpanded);
+  Matcher E(Exp3.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("{")));
+  Unexp.addLine(
+      line({E.consume("f([] {"),
+            children({line({E.consume("f([] {"),
+                            children({line(E.consume("return a * b;"))}),
+                            E.consume("})")})}),
+            E.consume("})")}));
+  Unexp.addLine(line(E.consume("}")));
+  EXPECT_TRUE(Unexp.finished());
+
+  // Expect lines:
+  // ID(
+  //   {
+  //   CALL(CALL(return a * b;))
+  //   }
+  // )
+  Matcher U1(Call1, Lex);
+  Matcher U2(Call2, Lex);
+  Matcher U3(Call3, Lex);
+  auto Chunk3Start = U3.consume("ID(");
+  auto Chunk3LBrace = U3.consume("{");
+  U3.consume("f([] { f([] { return a * b; }) })");
+  auto Chunk3RBrace = U3.consume("}");
+  auto Chunk3End = U3.consume(")");
+  auto Chunk2Start = U2.consume("CALL(");
+  U2.consume("f([] { return a * b; })");
+  auto Chunk2End = U2.consume(")");
+  auto Chunk1 = U1.consume("CALL(return a * b;)");
+
+  auto Expected = line({
+      Chunk3Start,
+      children({
+          line(Chunk3LBrace),
+          line({
+              Chunk2Start,
+              Chunk1,
+              Chunk2End,
+          }),
+          line(Chunk3RBrace),
+      }),
+      Chunk3End,
+  });
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedChildrenMultipleArguments) {
+  auto Macros = createExpander({"CALL(a, b)=f([] { a; b; })"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("int a"), "int b"});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line({
+      E.consume("f([] {"),
+      children({
+          line(E.consume("int a;")),
+          line(E.consume("int b;")),
+      }),
+      E.consume("})"),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line(U.consume("CALL(int a, int b)"));
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, ReverseOrderArgumentsInExpansion) {
+  auto Macros = createExpander({"CALL(a, b)=b + a"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("x"), "y"});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("y + x")));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line(U.consume("CALL(x, y)"));
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, MultipleToplevelUnwrappedLines) {
+  auto Macros = createExpander({"ID(a, b)=a b"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("ID", {std::string("x; x"), "y"});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("x;")));
+  Unexp.addLine(line(E.consume("x y")));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      U.consume("ID("),
+      children({
+          line(U.consume("x;")),
+          line(U.consume("x")),
+      }),
+      U.consume(", y)"),
+  });
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedCallsMultipleLines) {
+  auto Macros = createExpander({"ID(x)=x"});
+  // Test: ID({ID(a * b);})
+  // 1. expand ID(a * b)
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("ID", {"a * b"});
+  // 2. expand ID({ a * b; })
+  Expansion Exp2(Lex, *Macros);
+  TokenList Arg2;
+  Arg2.push_back(Lex.id("{"));
+  Arg2.append(Exp1.getTokens().begin(), Exp1.getTokens().end());
+  Arg2.push_back(Lex.id(";"));
+  Arg2.push_back(Lex.id("}"));
+  TokenList Call2 = Exp2.expand("ID", {Arg2});
+
+  // Consume as-if formatted in three unwrapped lines:
+  // 0: {
+  // 1:   a * b;
+  // 2: }
+  UnexpandedMap Unexpanded =
+      mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+  MacroCallReconstructor Unexp(0, Unexpanded);
+  Matcher E(Exp2.getTokens(), Lex);
+  Unexp.addLine(line(E.consume("{")));
+  Unexp.addLine(line(E.consume("a * b;")));
+  Unexp.addLine(line(E.consume("}")));
+  EXPECT_TRUE(Unexp.finished());
+
+  // Expect lines:
+  // ID(
+  //     {
+  //     ID(a * b);
+  //     }
+  // )
+  Matcher U1(Call1, Lex);
+  Matcher U2(Call2, Lex);
+  auto Chunk2Start = U2.consume("ID(");
+  auto Chunk2LBrace = U2.consume("{");
+  U2.consume("a * b");
+  auto Chunk2Semi = U2.consume(";");
+  auto Chunk2RBrace = U2.consume("}");
+  auto Chunk2End = U2.consume(")");
+  auto Chunk1 = U1.consume("ID(a * b)");
+
+  auto Expected = line({
+      Chunk2Start,
+      children({
+          line({Chunk2LBrace}),
+          line({Chunk1, Chunk2Semi}),
+          line({Chunk2RBrace}),
+      }),
+      Chunk2End,
+  });
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, ParentOutsideMacroCall) {
+  auto Macros = createExpander({"ID(a)=a"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("ID", {std::string("x; y; z;")});
+
+  auto Prefix = tokens("int a = []() {");
+  auto Postfix = tokens("}();");
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line({
+      Prefix,
+      children({
+          line(E.consume("x;")),
+          line(E.consume("y;")),
+          line(E.consume("z;")),
+      }),
+      Postfix,
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      Prefix,
+      children({
+          line({
+              U.consume("ID("),
+              children({
+                  line(U.consume("x;")),
+                  line(U.consume("y;")),
+                  line(U.consume("z;")),
+              }),
+              U.consume(")"),
+          }),
+      }),
+      Postfix,
+  });
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, UnusedMacroArguments) {
+  auto Macros = createExpander({"X=x"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("X", {"a", "b", "c"});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Unexp.addLine(line(Exp.getTokens()));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  EXPECT_THAT(std::move(Unexp).takeResult(),
+              matchesLine(line(U.consume("X(a, b, c)"))));
+}
+
+TEST_F(MacroCallReconstructorTest, UnusedEmptyMacroArgument) {
+  auto Macros = createExpander({"X=x"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("X", {std::string("")});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  auto Semi = tokens(";");
+  Unexp.addLine(line({E.consume("x"), Semi}));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  EXPECT_THAT(std::move(Unexp).takeResult(),
+              matchesLine(line({U.consume("X()"), Semi})));
+}
+
+TEST_F(MacroCallReconstructorTest, ChildrenSplitAcrossArguments) {
+  auto Macros = createExpander({"CALL(a, b)=f([]() a b)"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("{ a;"), "b; }"});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  Unexp.addLine(line({
+      E.consume("f([]() {"),
+      children({
+          line(E.consume("a;")),
+          line(E.consume("b;")),
+      }),
+      E.consume("})"),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      U.consume("CALL({"),
+      children(line(U.consume("a;"))),
+      U.consume(", b; })"),
+  });
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, ChildrenAfterMacroCall) {
+  auto Macros = createExpander({"CALL(a, b)=f([]() a b"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("{ a"), "b"});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  auto Semi = tokens(";");
+  auto SecondLine = tokens("c d;");
+  auto ThirdLine = tokens("e f;");
+  auto Postfix = tokens("})");
+  Unexp.addLine(line({
+      E.consume("f([]() {"),
+      children({
+          line({E.consume("a b"), Semi}),
+          line(SecondLine),
+          line(ThirdLine),
+      }),
+      Postfix,
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      U.consume("CALL({"),
+      children(line(U.consume("a"))),
+      U.consume(", b)"),
+      Semi,
+      children(line({
+          SecondLine,
+          children(line({
+              ThirdLine,
+              Postfix,
+          })),
+      })),
+  });
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, InvalidCodeSplittingBracesAcrossArgs) {
+  auto Macros = createExpander({"M(a, b)=(a) (b)"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("M", {std::string("{"), "x", ""});
+
+  MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens(), Lex);
+  auto Prefix = tokens("({");
+  Unexp.addLine(line({
+      Prefix,
+      children({
+          line({
+              E.consume("({"),
+              children({line(E.consume(")(x)"))}),
+          }),
+      }),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call, Lex);
+  auto Expected = line({
+      Prefix,
+      children({line(U.consume("M({,x,)"))}),
+  });
+  EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+} // namespace
+} // namespace format
+} // namespace clang


        


More information about the cfe-commits mailing list