[clang] d6d0dc1 - [clang-format] Add MacroUnexpander.
Manuel Klimek via cfe-commits
cfe-commits at lists.llvm.org
Tue Jul 12 00:12:12 PDT 2022
Author: Manuel Klimek
Date: 2022-07-12T07:11:46Z
New Revision: d6d0dc1f45377ddaf5c10a48d64b09308b71501a
URL: https://github.com/llvm/llvm-project/commit/d6d0dc1f45377ddaf5c10a48d64b09308b71501a
DIFF: https://github.com/llvm/llvm-project/commit/d6d0dc1f45377ddaf5c10a48d64b09308b71501a.diff
LOG: [clang-format] Add MacroUnexpander.
MacroUnexpander applies the structural formatting of expanded lines into
UnwrappedLines to the corresponding unexpanded macro calls, resulting in
UnwrappedLines for the macro calls the user typed.
Differential Revision: https://reviews.llvm.org/D88299
Added:
clang/lib/Format/MacroCallReconstructor.cpp
clang/unittests/Format/MacroCallReconstructorTest.cpp
Modified:
clang/lib/Format/CMakeLists.txt
clang/lib/Format/FormatToken.h
clang/lib/Format/Macros.h
clang/lib/Format/UnwrappedLineParser.h
clang/unittests/Format/CMakeLists.txt
Removed:
################################################################################
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index ca455157ae44a..4ea02ea72bc77 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -8,6 +8,7 @@ add_clang_library(clangFormat
Format.cpp
FormatToken.cpp
FormatTokenLexer.cpp
+ MacroCallReconstructor.cpp
MacroExpander.cpp
NamespaceEndCommentsFixer.cpp
QualifierAlignmentFixer.cpp
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index b6cc021affae3..73e32979853f5 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -497,6 +497,15 @@ struct FormatToken {
// in a configured macro expansion.
llvm::Optional<MacroExpansion> MacroCtx;
+ /// When macro expansion introduces nodes with children, those are marked as
+ /// \c MacroParent.
+ /// FIXME: The formatting code currently hard-codes the assumption that
+ /// child nodes are introduced by blocks following an opening brace.
+ /// This is deeply baked into the code and disentangling this will require
+ /// signficant refactorings. \c MacroParent allows us to special-case the
+ /// cases in which we treat parents as block-openers for now.
+ bool MacroParent = false;
+
bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
bool is(TokenType TT) const { return getType() == TT; }
bool is(const IdentifierInfo *II) const {
diff --git a/clang/lib/Format/MacroCallReconstructor.cpp b/clang/lib/Format/MacroCallReconstructor.cpp
new file mode 100644
index 0000000000000..67711cc91d0b8
--- /dev/null
+++ b/clang/lib/Format/MacroCallReconstructor.cpp
@@ -0,0 +1,569 @@
+//===--- MacroCallReconstructor.cpp - Format C++ code -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of MacroCallReconstructor, which fits
+/// an reconstructed macro call to a parsed set of UnwrappedLines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Macros.h"
+
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/TokenKinds.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Debug.h"
+#include <cassert>
+
+#define DEBUG_TYPE "format-reconstruct"
+
+namespace clang {
+namespace format {
+
+// Call \p Call for each token in the unwrapped line given, passing
+// the token, its parent and whether it is the first token in the line.
+template <typename T>
+void forEachToken(const UnwrappedLine &Line, const T &Call,
+ FormatToken *Parent = nullptr) {
+ bool First = true;
+ for (const auto &N : Line.Tokens) {
+ Call(N.Tok, Parent, First);
+ First = false;
+ for (const auto &Child : N.Children) {
+ forEachToken(Child, Call, N.Tok);
+ }
+ }
+}
+
+MacroCallReconstructor::MacroCallReconstructor(
+ unsigned Level,
+ const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
+ &ActiveExpansions)
+ : Level(Level), IdToReconstructed(ActiveExpansions) {
+ Result.Tokens.push_back(std::make_unique<LineNode>());
+ ActiveReconstructedLines.push_back(&Result);
+}
+
+void MacroCallReconstructor::addLine(const UnwrappedLine &Line) {
+ assert(State != Finalized);
+ LLVM_DEBUG(llvm::dbgs() << "MCR: new line...\n");
+ forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
+ add(Token, Parent, First);
+ });
+ assert(InProgress || finished());
+}
+
+UnwrappedLine MacroCallReconstructor::takeResult() && {
+ finalize();
+ assert(Result.Tokens.size() == 1 && Result.Tokens.front()->Children.size() == 1);
+ UnwrappedLine Final =
+ createUnwrappedLine(*Result.Tokens.front()->Children.front(), Level);
+ assert(!Final.Tokens.empty());
+ return Final;
+}
+
+// Reconstruct the position of the next \p Token, given its parent \p
+// ExpandedParent in the incoming unwrapped line. \p First specifies whether it
+// is the first token in a given unwrapped line.
+void MacroCallReconstructor::add(FormatToken *Token,
+ FormatToken *ExpandedParent, bool First) {
+ LLVM_DEBUG(
+ llvm::dbgs() << "MCR: Token: " << Token->TokenText << ", Parent: "
+ << (ExpandedParent ? ExpandedParent->TokenText : "<null>")
+ << ", First: " << First << "\n");
+ // In order to be able to find the correct parent in the reconstructed token
+ // stream, we need to continue the last open reconstruction until we find the
+ // given token if it is part of the reconstructed token stream.
+ //
+ // Note that hidden tokens can be part of the reconstructed stream in nested
+ // macro calls.
+ // For example, given
+ // #define C(x, y) x y
+ // #define B(x) {x}
+ // And the call:
+ // C(a, B(b))
+ // The outer macro call will be C(a, {b}), and the hidden token '}' can be
+ // found in the reconstructed token stream of that expansion level.
+ // In the expanded token stream
+ // a {b}
+ // 'b' is a child of '{'. We need to continue the open expansion of the ','
+ // in the call of 'C' in order to correctly set the ',' as the parent of '{',
+ // so we later set the spelled token 'b' as a child of the ','.
+ if (!ActiveExpansions.empty() && Token->MacroCtx &&
+ (Token->MacroCtx->Role != MR_Hidden ||
+ ActiveExpansions.size() != Token->MacroCtx->ExpandedFrom.size())) {
+ if (bool PassedMacroComma = reconstructActiveCallUntil(Token))
+ First = true;
+ }
+
+ prepareParent(ExpandedParent, First);
+
+ if (Token->MacroCtx) {
+ // If this token was generated by a macro call, add the reconstructed
+ // equivalent of the token.
+ reconstruct(Token);
+ } else {
+ // Otherwise, we add it to the current line.
+ appendToken(Token);
+ }
+}
+
+// Adjusts the stack of active reconstructed lines so we're ready to push
+// tokens. The tokens to be pushed are children of ExpandedParent in the
+// expanded code.
+//
+// This may entail:
+// - creating a new line, if the parent is on the active line
+// - popping active lines, if the parent is further up the stack
+//
+// Postcondition:
+// ActiveReconstructedLines.back() is the line that has \p ExpandedParent or its
+// reconstructed replacement token as a parent (when possible) - that is, the
+// last token in \c ActiveReconstructedLines[ActiveReconstructedLines.size()-2]
+// is the parent of ActiveReconstructedLines.back() in the reconstructed
+// unwrapped line.
+void MacroCallReconstructor::prepareParent(FormatToken *ExpandedParent,
+ bool NewLine) {
+ LLVM_DEBUG({
+ llvm::dbgs() << "ParentMap:\n";
+ debugParentMap();
+ });
+ // We want to find the parent in the new unwrapped line, where the expanded
+ // parent might have been replaced during reconstruction.
+ FormatToken *Parent = getParentInResult(ExpandedParent);
+ LLVM_DEBUG(llvm::dbgs() << "MCR: New parent: "
+ << (Parent ? Parent->TokenText : "<null>") << "\n");
+
+ FormatToken *OpenMacroParent = nullptr;
+ if (!MacroCallStructure.empty()) {
+ // Inside a macro expansion, it is possible to lose track of the correct
+ // parent - either because it is already popped, for example because it was
+ // in a
diff erent macro argument (e.g. M({, })), or when we work on invalid
+ // code.
+ // Thus, we use the innermost macro call's parent as the parent at which
+ // we stop; this allows us to stay within the macro expansion and keeps
+ // any problems confined to the extent of the macro call.
+ OpenMacroParent =
+ getParentInResult(MacroCallStructure.back().MacroCallLParen);
+ LLVM_DEBUG(llvm::dbgs()
+ << "MacroCallLParen: "
+ << MacroCallStructure.back().MacroCallLParen->TokenText
+ << ", OpenMacroParent: "
+ << (OpenMacroParent ? OpenMacroParent->TokenText : "<null>")
+ << "\n");
+ }
+ if (NewLine ||
+ (!ActiveReconstructedLines.back()->Tokens.empty() &&
+ Parent == ActiveReconstructedLines.back()->Tokens.back()->Tok)) {
+ // If we are at the first token in a new line, we want to also
+ // create a new line in the resulting reconstructed unwrapped line.
+ while (ActiveReconstructedLines.back()->Tokens.empty() ||
+ (Parent != ActiveReconstructedLines.back()->Tokens.back()->Tok &&
+ ActiveReconstructedLines.back()->Tokens.back()->Tok !=
+ OpenMacroParent)) {
+ ActiveReconstructedLines.pop_back();
+ assert(!ActiveReconstructedLines.empty());
+ }
+ assert(!ActiveReconstructedLines.empty());
+ ActiveReconstructedLines.back()->Tokens.back()->Children.push_back(
+ std::make_unique<Line>());
+ ActiveReconstructedLines.push_back(
+ &*ActiveReconstructedLines.back()->Tokens.back()->Children.back());
+ } else if (parentLine().Tokens.back()->Tok != Parent) {
+ // If we're not the first token in a new line, pop lines until we find
+ // the child of \c Parent in the stack.
+ while (Parent != parentLine().Tokens.back()->Tok &&
+ parentLine().Tokens.back()->Tok &&
+ parentLine().Tokens.back()->Tok != OpenMacroParent) {
+ ActiveReconstructedLines.pop_back();
+ assert(!ActiveReconstructedLines.empty());
+ }
+ }
+ assert(!ActiveReconstructedLines.empty());
+}
+
+// For a given \p Parent in the incoming expanded token stream, find the
+// corresponding parent in the output.
+FormatToken *MacroCallReconstructor::getParentInResult(FormatToken *Parent) {
+ FormatToken *Mapped = SpelledParentToReconstructedParent.lookup(Parent);
+ if (!Mapped)
+ return Parent;
+ for (; Mapped; Mapped = SpelledParentToReconstructedParent.lookup(Parent)) {
+ Parent = Mapped;
+ }
+ // If we use a
diff erent token than the parent in the expanded token stream
+ // as parent, mark it as a special parent, so the formatting code knows it
+ // needs to have its children formatted.
+ Parent->MacroParent = true;
+ return Parent;
+}
+
+// Reconstruct a \p Token that was expanded from a macro call.
+void MacroCallReconstructor::reconstruct(FormatToken *Token) {
+ assert(Token->MacroCtx);
+ // A single token can be the only result of a macro call:
+ // Given: #define ID(x, y) ;
+ // And the call: ID(<some>, <tokens>)
+ // ';' in the expanded stream will reconstruct all of ID(<some>, <tokens>).
+ if (Token->MacroCtx->StartOfExpansion) {
+ startReconstruction(Token);
+ // If the order of tokens in the expanded token stream is not the
+ // same as the order of tokens in the reconstructed stream, we need
+ // to reconstruct tokens that arrive later in the stream.
+ if (Token->MacroCtx->Role != MR_Hidden) {
+ reconstructActiveCallUntil(Token);
+ }
+ }
+ assert(!ActiveExpansions.empty());
+ if (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE) {
+ assert(ActiveExpansions.size() == Token->MacroCtx->ExpandedFrom.size());
+ if (Token->MacroCtx->Role != MR_Hidden) {
+ // The current token in the reconstructed token stream must be the token
+ // we're looking for - we either arrive here after startReconstruction,
+ // which initiates the stream to the first token, or after
+ // continueReconstructionUntil skipped until the expected token in the
+ // reconstructed stream at the start of add(...).
+ assert(ActiveExpansions.back().SpelledI->Tok == Token);
+ processNextReconstructed();
+ } else if (!currentLine()->Tokens.empty()) {
+ // Map all hidden tokens to the last visible token in the output.
+ // If the hidden token is a parent, we'll use the last visible
+ // token as the parent of the hidden token's children.
+ SpelledParentToReconstructedParent[Token] =
+ currentLine()->Tokens.back()->Tok;
+ } else {
+ for (auto I = ActiveReconstructedLines.rbegin(),
+ E = ActiveReconstructedLines.rend();
+ I != E; ++I) {
+ if (!(*I)->Tokens.empty()) {
+ SpelledParentToReconstructedParent[Token] = (*I)->Tokens.back()->Tok;
+ break;
+ }
+ }
+ }
+ }
+ if (Token->MacroCtx->EndOfExpansion)
+ endReconstruction(Token);
+}
+
+// Given a \p Token that starts an expansion, reconstruct the beginning of the
+// macro call.
+// For example, given: #define ID(x) x
+// And the call: ID(int a)
+// Reconstructs: ID(
+void MacroCallReconstructor::startReconstruction(FormatToken *Token) {
+ assert(Token->MacroCtx);
+ assert(!Token->MacroCtx->ExpandedFrom.empty());
+ assert(ActiveExpansions.size() <= Token->MacroCtx->ExpandedFrom.size());
+#ifndef NDEBUG
+ // Check that the token's reconstruction stack matches our current
+ // reconstruction stack.
+ for (size_t I = 0; I < ActiveExpansions.size(); ++I) {
+ assert(ActiveExpansions[I].ID ==
+ Token->MacroCtx
+ ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
+ }
+#endif
+ // Start reconstruction for all calls for which this token is the first token
+ // generated by the call.
+ // Note that the token's expanded from stack is inside-to-outside, and the
+ // expansions for which this token is not the first are the outermost ones.
+ ArrayRef<FormatToken *> StartedMacros =
+ makeArrayRef(Token->MacroCtx->ExpandedFrom)
+ .drop_back(ActiveExpansions.size());
+ assert(StartedMacros.size() == Token->MacroCtx->StartOfExpansion);
+ // We reconstruct macro calls outside-to-inside.
+ for (FormatToken *ID : llvm::reverse(StartedMacros)) {
+ // We found a macro call to be reconstructed; the next time our
+ // reconstruction stack is empty we know we finished an reconstruction.
+#ifndef NDEBUG
+ State = InProgress;
+#endif
+ // Put the reconstructed macro call's token into our reconstruction stack.
+ auto IU = IdToReconstructed.find(ID);
+ assert(IU != IdToReconstructed.end());
+ ActiveExpansions.push_back(
+ {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
+ // Process the macro call's identifier.
+ processNextReconstructed();
+ if (ActiveExpansions.back().SpelledI == ActiveExpansions.back().SpelledE)
+ continue;
+ if (ActiveExpansions.back().SpelledI->Tok->is(tok::l_paren)) {
+ // Process the optional opening parenthesis.
+ processNextReconstructed();
+ }
+ }
+}
+
+// Add all tokens in the reconstruction stream to the output until we find the
+// given \p Token.
+bool MacroCallReconstructor::reconstructActiveCallUntil(FormatToken *Token) {
+ assert(!ActiveExpansions.empty());
+ bool PassedMacroComma = false;
+ // FIXME: If Token was already expanded earlier, due to
+ // a change in order, we will not find it, but need to
+ // skip it.
+ while (ActiveExpansions.back().SpelledI != ActiveExpansions.back().SpelledE &&
+ ActiveExpansions.back().SpelledI->Tok != Token) {
+ PassedMacroComma = processNextReconstructed() || PassedMacroComma;
+ }
+ return PassedMacroComma;
+}
+
+// End all reconstructions for which \p Token is the final token.
+void MacroCallReconstructor::endReconstruction(FormatToken *Token) {
+ assert(Token->MacroCtx &&
+ (ActiveExpansions.size() >= Token->MacroCtx->EndOfExpansion));
+ for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
+#ifndef NDEBUG
+ // Check all remaining tokens but the final closing parenthesis and optional
+ // trailing comment were already reconstructed at an inner expansion level.
+ for (auto T = ActiveExpansions.back().SpelledI;
+ T != ActiveExpansions.back().SpelledE; ++T) {
+ FormatToken *Token = T->Tok;
+ bool ClosingParen = (std::next(T) == ActiveExpansions.back().SpelledE ||
+ std::next(T)->Tok->isTrailingComment()) &&
+ !Token->MacroCtx && Token->is(tok::r_paren);
+ bool TrailingComment = Token->isTrailingComment();
+ bool PreviousLevel =
+ Token->MacroCtx &&
+ (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size());
+ if (!ClosingParen && !TrailingComment && !PreviousLevel) {
+ llvm::dbgs() << "At token: " << Token->TokenText << "\n";
+ }
+ // In addition to the following cases, we can also run into this
+ // when a macro call had more arguments than expected; in that case,
+ // the comma and the remaining tokens in the macro call will potentially
+ // end up in the line when we finish the expansion.
+ // FIXME: Add the information which arguments are unused, and assert
+ // one of the cases below plus reconstructed macro argument tokens.
+ // assert(ClosingParen || TrailingComment || PreviousLevel);
+ }
+#endif
+ // Handle the remaining open tokens:
+ // - expand the closing parenthesis, if it exists, including an optional
+ // trailing comment
+ // - handle tokens that were already reconstructed at an inner expansion
+ // level
+ // - handle tokens when a macro call had more than the expected number of
+ // arguments, i.e. when #define M(x) is called as M(a, b, c) we'll end
+ // up with the sequence ", b, c)" being open at the end of the
+ // reconstruction; we want to gracefully handle that case
+ //
+ // FIXME: See the above debug-check for what we will need to do to be
+ // able to assert this.
+ for (auto T = ActiveExpansions.back().SpelledI;
+ T != ActiveExpansions.back().SpelledE; ++T) {
+ processNextReconstructed();
+ }
+ ActiveExpansions.pop_back();
+ }
+}
+
+void MacroCallReconstructor::debugParentMap() const {
+ llvm::DenseSet<FormatToken *> Values;
+ for (const auto &P : SpelledParentToReconstructedParent)
+ Values.insert(P.second);
+
+ for (const auto &P : SpelledParentToReconstructedParent) {
+ if (Values.contains(P.first))
+ continue;
+ llvm::dbgs() << (P.first ? P.first->TokenText : "<null>");
+ for (auto I = SpelledParentToReconstructedParent.find(P.first),
+ E = SpelledParentToReconstructedParent.end();
+ I != E; I = SpelledParentToReconstructedParent.find(I->second)) {
+ llvm::dbgs() << " -> " << (I->second ? I->second->TokenText : "<null>");
+ }
+ llvm::dbgs() << "\n";
+ }
+}
+
+// If visible, add the next token of the reconstructed token sequence to the
+// output. Returns whether reconstruction passed a comma that is part of a
+// macro call.
+bool MacroCallReconstructor::processNextReconstructed() {
+ FormatToken *Token = ActiveExpansions.back().SpelledI->Tok;
+ ++ActiveExpansions.back().SpelledI;
+ if (Token->MacroCtx) {
+ // Skip tokens that are not part of the macro call.
+ if (Token->MacroCtx->Role == MR_Hidden) {
+ return false;
+ }
+ // Skip tokens we already expanded during an inner reconstruction.
+ // For example, given: #define ID(x) {x}
+ // And the call: ID(ID(f))
+ // We get two reconstructions:
+ // ID(f) -> {f}
+ // ID({f}) -> {{f}}
+ // We reconstruct f during the first reconstruction, and skip it during the
+ // second reconstruction.
+ if (ActiveExpansions.size() < Token->MacroCtx->ExpandedFrom.size()) {
+ return false;
+ }
+ }
+ // Tokens that do not have a macro context are tokens in that are part of the
+ // macro call that have not taken part in expansion.
+ if (!Token->MacroCtx) {
+ // Put the parentheses and commas of a macro call into the same line;
+ // if the arguments produce new unwrapped lines, they will become children
+ // of the corresponding opening parenthesis or comma tokens in the
+ // reconstructed call.
+ if (Token->is(tok::l_paren)) {
+ MacroCallStructure.push_back(MacroCallState(
+ currentLine(), parentLine().Tokens.back()->Tok, Token));
+ // All tokens that are children of the previous line's last token in the
+ // reconstructed token stream will now be children of the l_paren token.
+ // For example, for the line containing the macro calls:
+ // auto x = ID({ID(2)});
+ // We will build up a map <null> -> ( -> ( with the first and second
+ // l_paren of the macro call respectively. New lines that come in with a
+ // <null> parent will then become children of the l_paren token of the
+ // currently innermost macro call.
+ SpelledParentToReconstructedParent[MacroCallStructure.back()
+ .ParentLastToken] = Token;
+ appendToken(Token);
+ prepareParent(Token, /*NewLine=*/true);
+ Token->MacroParent = true;
+ return false;
+ }
+ if (!MacroCallStructure.empty()) {
+ if (Token->is(tok::comma)) {
+ // Make new lines inside the next argument children of the comma token.
+ SpelledParentToReconstructedParent
+ [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
+ Token->MacroParent = true;
+ appendToken(Token, MacroCallStructure.back().Line);
+ prepareParent(Token, /*NewLine=*/true);
+ return true;
+ }
+ if (Token->is(tok::r_paren)) {
+ appendToken(Token, MacroCallStructure.back().Line);
+ SpelledParentToReconstructedParent.erase(
+ MacroCallStructure.back().ParentLastToken);
+ MacroCallStructure.pop_back();
+ return false;
+ }
+ }
+ }
+ // Note that any tokens that are tagged with MR_None have been passed as
+ // arguments to the macro that have not been expanded, for example:
+ // Given: #define ID(X) x
+ // When calling: ID(a, b)
+ // 'b' will be part of the reconstructed token stream, but tagged MR_None.
+ // Given that erroring out in this case would be disruptive, we continue
+ // pushing the (unformatted) token.
+ // FIXME: This can lead to unfortunate formatting decisions - give the user
+ // a hint that their macro definition is broken.
+ appendToken(Token);
+ return false;
+}
+
+void MacroCallReconstructor::finalize() {
+#ifndef NDEBUG
+ assert(State != Finalized && finished());
+ State = Finalized;
+#endif
+
+ // We created corresponding unwrapped lines for each incoming line as children
+ // the the toplevel null token.
+ assert(Result.Tokens.size() == 1 && !Result.Tokens.front()->Children.empty());
+ LLVM_DEBUG({
+ llvm::dbgs() << "Finalizing reconstructed lines:\n";
+ debug(Result, 0);
+ });
+
+ // The first line becomes the top level line in the resulting unwrapped line.
+ LineNode &Top = *Result.Tokens.front();
+ auto *I = Top.Children.begin();
+ // Every subsequent line will become a child of the last token in the previous
+ // line, which is the token prior to the first token in the line.
+ LineNode *Last = (*I)->Tokens.back().get();
+ ++I;
+ for (auto *E = Top.Children.end(); I != E; ++I) {
+ assert(Last->Children.empty());
+ Last->Children.push_back(std::move(*I));
+
+ // Mark the previous line's last token as generated by a macro expansion
+ // so the formatting algorithm can take that into account.
+ Last->Tok->MacroParent = true;
+
+ Last = Last->Children.back()->Tokens.back().get();
+ }
+ Top.Children.resize(1);
+}
+
+void MacroCallReconstructor::appendToken(FormatToken *Token, Line *L) {
+ L = L ? L : currentLine();
+ LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
+ L->Tokens.push_back(std::make_unique<LineNode>(Token));
+}
+
+UnwrappedLine MacroCallReconstructor::createUnwrappedLine(const Line &Line,
+ int Level) {
+ UnwrappedLine Result;
+ Result.Level = Level;
+ for (const auto &N : Line.Tokens) {
+ Result.Tokens.push_back(N->Tok);
+ UnwrappedLineNode &Current = Result.Tokens.back();
+ for (const auto &Child : N->Children) {
+ if (Child->Tokens.empty())
+ continue;
+ Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
+ }
+ if (Current.Children.size() == 1 &&
+ Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
+ Result.Tokens.splice(Result.Tokens.end(),
+ Current.Children.front().Tokens);
+ Current.Children.clear();
+ }
+ }
+ return Result;
+}
+
+void MacroCallReconstructor::debug(const Line &Line, int Level) {
+ for (int i = 0; i < Level; ++i)
+ llvm::dbgs() << " ";
+ for (const auto &N : Line.Tokens) {
+ if (!N)
+ continue;
+ if (N->Tok)
+ llvm::dbgs() << N->Tok->TokenText << " ";
+ for (const auto &Child : N->Children) {
+ llvm::dbgs() << "\n";
+ debug(*Child, Level + 1);
+ for (int i = 0; i < Level; ++i)
+ llvm::dbgs() << " ";
+ }
+ }
+ llvm::dbgs() << "\n";
+}
+
+MacroCallReconstructor::Line &MacroCallReconstructor::parentLine() {
+ return **std::prev(std::prev(ActiveReconstructedLines.end()));
+}
+
+MacroCallReconstructor::Line *MacroCallReconstructor::currentLine() {
+ return ActiveReconstructedLines.back();
+}
+
+MacroCallReconstructor::MacroCallState::MacroCallState(
+ MacroCallReconstructor::Line *Line, FormatToken *ParentLastToken,
+ FormatToken *MacroCallLParen)
+ : Line(Line), ParentLastToken(ParentLastToken),
+ MacroCallLParen(MacroCallLParen) {
+ LLVM_DEBUG(
+ llvm::dbgs() << "ParentLastToken: "
+ << (ParentLastToken ? ParentLastToken->TokenText : "<null>")
+ << "\n");
+
+ assert(MacroCallLParen->is(tok::l_paren));
+}
+
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h
index da03beb091454..59774647a5694 100644
--- a/clang/lib/Format/Macros.h
+++ b/clang/lib/Format/Macros.h
@@ -1,4 +1,4 @@
-//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===//
+//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -22,40 +22,38 @@
/// spelled token streams into expanded token streams when it encounters a
/// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
/// from the expanded token stream.
-/// After the expanded unwrapped lines are parsed, the MacroUnexpander matches
-/// the spelled token stream into unwrapped lines that best resemble the
-/// structure of the expanded unwrapped lines.
+/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
+/// matches the spelled token stream into unwrapped lines that best resemble the
+/// structure of the expanded unwrapped lines. These reconstructed unwrapped
+/// lines are aliasing the tokens in the expanded token stream, so that token
+/// annotations will be reused when formatting the spelled macro calls.
///
-/// When formatting, clang-format formats the expanded unwrapped lines first,
-/// determining the token types. Next, it formats the spelled unwrapped lines,
-/// keeping the token types fixed, while allowing other formatting decisions
-/// to change.
+/// When formatting, clang-format annotates and formats the expanded unwrapped
+/// lines first, determining the token types. Next, it formats the spelled
+/// unwrapped lines, keeping the token types fixed, while allowing other
+/// formatting decisions to change.
///
//===----------------------------------------------------------------------===//
#ifndef CLANG_LIB_FORMAT_MACROS_H
#define CLANG_LIB_FORMAT_MACROS_H
+#include <list>
+#include <map>
#include <string>
-#include <unordered_map>
#include <vector>
-#include "Encoding.h"
#include "FormatToken.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-namespace llvm {
-class MemoryBuffer;
-} // namespace llvm
-
namespace clang {
-class IdentifierTable;
-class SourceManager;
-
namespace format {
-struct FormatStyle;
+
+struct UnwrappedLine;
+struct UnwrappedLineNode;
/// Takes a set of macro definitions as strings and allows expanding calls to
/// those macros.
@@ -134,6 +132,249 @@ class MacroExpander {
llvm::StringMap<Definition> Definitions;
};
+/// Converts a sequence of UnwrappedLines containing expanded macros into a
+/// single UnwrappedLine containing the macro calls. This UnwrappedLine may be
+/// broken into child lines, in a way that best conveys the structure of the
+/// expanded code.
+///
+/// In the simplest case, a spelled UnwrappedLine contains one macro, and after
+/// expanding it we have one expanded UnwrappedLine. In general, macro
+/// expansions can span UnwrappedLines, and multiple macros can contribute
+/// tokens to the same line. We keep consuming expanded lines until:
+/// * all expansions that started have finished (we're not chopping any macros
+/// in half)
+/// * *and* we've reached the end of a *spelled* unwrapped line.
+///
+/// A single UnwrappedLine represents this chunk of code.
+///
+/// After this point, the state of the spelled/expanded stream is "in sync"
+/// (both at the start of an UnwrappedLine, with no macros open), so the
+/// Unexpander can be thrown away and parsing can continue.
+///
+/// Given a mapping from the macro name identifier token in the macro call
+/// to the tokens of the macro call, for example:
+/// CLASSA -> CLASSA({public: void x();})
+///
+/// When getting the formatted lines of the expansion via the \c addLine method
+/// (each '->' specifies a call to \c addLine ):
+/// -> class A {
+/// -> public:
+/// -> void x();
+/// -> };
+///
+/// Creates the tree of unwrapped lines containing the macro call tokens so that
+/// the macro call tokens fit the semantic structure of the expanded formatted
+/// lines:
+/// -> CLASSA({
+/// -> public:
+/// -> void x();
+/// -> })
+class MacroCallReconstructor {
+public:
+ /// Create an Reconstructor whose resulting \p UnwrappedLine will start at
+ /// \p Level, using the map from name identifier token to the corresponding
+ /// tokens of the spelled macro call.
+ MacroCallReconstructor(
+ unsigned Level,
+ const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
+ &ActiveExpansions);
+
+ /// For the given \p Line, match all occurences of tokens expanded from a
+ /// macro to unwrapped lines in the spelled macro call so that the resulting
+ /// tree of unwrapped lines best resembles the structure of unwrapped lines
+ /// passed in via \c addLine.
+ void addLine(const UnwrappedLine &Line);
+
+ /// Check whether at the current state there is no open macro expansion
+ /// that needs to be processed to finish an macro call.
+ /// Only when \c finished() is true, \c takeResult() can be called to retrieve
+ /// the resulting \c UnwrappedLine.
+ /// If there are multiple subsequent macro calls within an unwrapped line in
+ /// the spelled token stream, the calling code may also continue to call
+ /// \c addLine() when \c finished() is true.
+ bool finished() const { return ActiveExpansions.empty(); }
+
+ /// Retrieve the formatted \c UnwrappedLine containing the orginal
+ /// macro calls, formatted according to the expanded token stream received
+ /// via \c addLine().
+ /// Generally, this line tries to have the same structure as the expanded,
+ /// formatted unwrapped lines handed in via \c addLine(), with the exception
+ /// that for multiple top-level lines, each subsequent line will be the
+ /// child of the last token in its predecessor. This representation is chosen
+ /// because it is a precondition to the formatter that we get what looks like
+ /// a single statement in a single \c UnwrappedLine (i.e. matching parens).
+ ///
+ /// If a token in a macro argument is a child of a token in the expansion,
+ /// the parent will be the corresponding token in the macro call.
+ /// For example:
+ /// #define C(a, b) class C { a b
+ /// C(int x;, int y;)
+ /// would expand to
+ /// class C { int x; int y;
+ /// where in a formatted line "int x;" and "int y;" would both be new separate
+ /// lines.
+ ///
+ /// In the result, "int x;" will be a child of the opening parenthesis in "C("
+ /// and "int y;" will be a child of the "," token:
+ /// C (
+ /// \- int x;
+ /// ,
+ /// \- int y;
+ /// )
+ UnwrappedLine takeResult() &&;
+
+private:
+ void add(FormatToken *Token, FormatToken *ExpandedParent, bool First);
+ void prepareParent(FormatToken *ExpandedParent, bool First);
+ FormatToken *getParentInResult(FormatToken *Parent);
+ void reconstruct(FormatToken *Token);
+ void startReconstruction(FormatToken *Token);
+ bool reconstructActiveCallUntil(FormatToken *Token);
+ void endReconstruction(FormatToken *Token);
+ bool processNextReconstructed();
+ void finalize();
+
+ struct Line;
+
+ void appendToken(FormatToken *Token, Line *L = nullptr);
+ UnwrappedLine createUnwrappedLine(const Line &Line, int Level);
+ void debug(const Line &Line, int Level);
+ Line &parentLine();
+ Line *currentLine();
+ void debugParentMap() const;
+
+#ifndef NDEBUG
+ enum ReconstructorState {
+ Start, // No macro expansion was found in the input yet.
+ InProgress, // During a macro reconstruction.
+ Finalized, // Past macro reconstruction, the result is finalized.
+ };
+ ReconstructorState State = Start;
+#endif
+
+ // Node in which we build up the resulting unwrapped line; this type is
+ // analogous to UnwrappedLineNode.
+ struct LineNode {
+ LineNode() = default;
+ LineNode(FormatToken *Tok) : Tok(Tok) {}
+ FormatToken *Tok = nullptr;
+ llvm::SmallVector<std::unique_ptr<Line>> Children;
+ };
+
+ // Line in which we build up the resulting unwrapped line.
+ // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
+ // instead of rolling our own type.
+ struct Line {
+ llvm::SmallVector<std::unique_ptr<LineNode>> Tokens;
+ };
+
+ // The line in which we collect the resulting reconstructed output.
+ // To reduce special cases in the algorithm, the first level of the line
+ // contains a single null token that has the reconstructed incoming
+ // lines as children.
+ // In the end, we stich the lines together so that each subsequent line
+ // is a child of the last token of the previous line. This is necessary
+ // in order to format the overall expression as a single logical line -
+ // if we created separate lines, we'd format them with their own top-level
+ // indent depending on the semantic structure, which is not desired.
+ Line Result;
+
+ // Stack of currently "open" lines, where each line's predecessor's last
+ // token is the parent token for that line.
+ llvm::SmallVector<Line *> ActiveReconstructedLines;
+
+ // Maps from the expanded token to the token that takes its place in the
+ // reconstructed token stream in terms of parent-child relationships.
+ // Note that it might take multiple steps to arrive at the correct
+ // parent in the output.
+ // Given: #define C(a, b) []() { a; b; }
+ // And a call: C(f(), g())
+ // The structure in the incoming formatted unwrapped line will be:
+ // []() {
+ // |- f();
+ // \- g();
+ // }
+ // with f and g being children of the opening brace.
+ // In the reconstructed call:
+ // C(f(), g())
+ // \- f()
+ // \- g()
+ // We want f to be a child of the opening parenthesis and g to be a child
+ // of the comma token in the macro call.
+ // Thus, we map
+ // { -> (
+ // and add
+ // ( -> ,
+ // once we're past the comma in the reconstruction.
+ llvm::DenseMap<FormatToken *, FormatToken *>
+ SpelledParentToReconstructedParent;
+
+ // Keeps track of a single expansion while we're reconstructing tokens it
+ // generated.
+ struct Expansion {
+ // The identifier token of the macro call.
+ FormatToken *ID;
+ // Our current position in the reconstruction.
+ std::list<UnwrappedLineNode>::iterator SpelledI;
+ // The end of the reconstructed token sequence.
+ std::list<UnwrappedLineNode>::iterator SpelledE;
+ };
+
+ // Stack of macro calls for which we're in the middle of an expansion.
+ llvm::SmallVector<Expansion> ActiveExpansions;
+
+ struct MacroCallState {
+ MacroCallState(Line *Line, FormatToken *ParentLastToken,
+ FormatToken *MacroCallLParen);
+
+ Line *Line;
+
+ // The last token in the parent line or expansion, or nullptr if the macro
+ // expansion is on a top-level line.
+ //
+ // For example, in the macro call:
+ // auto f = []() { ID(1); };
+ // The MacroCallState for ID will have '{' as ParentLastToken.
+ //
+ // In the macro call:
+ // ID(ID(void f()));
+ // The MacroCallState of the outer ID will have nullptr as ParentLastToken,
+ // while the MacroCallState for the inner ID will have the '(' of the outer
+ // ID as ParentLastToken.
+ //
+ // In the macro call:
+ // ID2(a, ID(b));
+ // The MacroCallState of ID will have ',' as ParentLastToken.
+ FormatToken *ParentLastToken;
+
+ // The l_paren of this MacroCallState's macro call.
+ FormatToken *MacroCallLParen;
+ };
+
+ // Keeps track of the lines into which the opening brace/parenthesis &
+ // argument separating commas for each level in the macro call go in order to
+ // put the corresponding closing brace/parenthesis into the same line in the
+ // output and keep track of which parents in the expanded token stream map to
+ // which tokens in the reconstructed stream.
+ // When an opening brace/parenthesis has children, we want the structure of
+ // the output line to be:
+ // |- MACRO
+ // |- (
+ // | \- <argument>
+ // |- ,
+ // | \- <argument>
+ // \- )
+ llvm::SmallVector<MacroCallState> MacroCallStructure;
+
+ // Level the generated UnwrappedLine will be at.
+ const unsigned Level;
+
+ // Maps from identifier of the macro call to an unwrapped line containing
+ // all tokens of the macro call.
+ const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
+ &IdToReconstructed;
+};
+
} // namespace format
} // namespace clang
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index 8f63870412d00..3394bfab8b8e8 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -20,6 +20,7 @@
#include "clang/Format/Format.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Support/Regex.h"
+#include <list>
#include <stack>
#include <vector>
@@ -38,7 +39,7 @@ struct UnwrappedLine {
UnwrappedLine();
/// The \c Tokens comprising this \c UnwrappedLine.
- std::vector<UnwrappedLineNode> Tokens;
+ std::list<UnwrappedLineNode> Tokens;
/// The indent level of the \c UnwrappedLine.
unsigned Level;
diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index 9cc6c7a96af59..715aec4ed5853 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -18,6 +18,7 @@ add_clang_unittest(FormatTests
FormatTestTableGen.cpp
FormatTestTextProto.cpp
FormatTestVerilog.cpp
+ MacroCallReconstructorTest.cpp
MacroExpanderTest.cpp
NamespaceEndCommentsFixerTest.cpp
QualifierFixerTest.cpp
diff --git a/clang/unittests/Format/MacroCallReconstructorTest.cpp b/clang/unittests/Format/MacroCallReconstructorTest.cpp
new file mode 100644
index 0000000000000..2bda62aa42be1
--- /dev/null
+++ b/clang/unittests/Format/MacroCallReconstructorTest.cpp
@@ -0,0 +1,688 @@
+#include "../../lib/Format/Macros.h"
+#include "../../lib/Format/UnwrappedLineParser.h"
+#include "TestLexer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <map>
+#include <memory>
+#include <vector>
+
+namespace clang {
+namespace format {
+namespace {
+
+using UnexpandedMap =
+ llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>;
+
+// Keeps track of a sequence of macro expansions.
+//
+// The expanded tokens are accessible via getTokens(), while a map of macro call
+// identifier token to unexpanded token stream is accessible via
+// getUnexpanded().
+class Expansion {
+public:
+ Expansion(TestLexer &Lex, MacroExpander &Macros) : Lex(Lex), Macros(Macros) {}
+
+ // Appends the token stream obtained from expanding the macro Name given
+ // the provided arguments, to be later retrieved with getTokens().
+ // Returns the list of tokens making up the unexpanded macro call.
+ TokenList
+ expand(llvm::StringRef Name,
+ const SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> &Args) {
+ auto *ID = Lex.id(Name);
+ auto UnexpandedLine = std::make_unique<UnwrappedLine>();
+ UnexpandedLine->Tokens.push_back(ID);
+ if (!Args.empty()) {
+ UnexpandedLine->Tokens.push_back(Lex.id("("));
+ for (auto I = Args.begin(), E = Args.end(); I != E; ++I) {
+ if (I != Args.begin())
+ UnexpandedLine->Tokens.push_back(Lex.id(","));
+ UnexpandedLine->Tokens.insert(UnexpandedLine->Tokens.end(), I->begin(),
+ I->end());
+ }
+ UnexpandedLine->Tokens.push_back(Lex.id(")"));
+ }
+ Unexpanded[ID] = std::move(UnexpandedLine);
+
+ auto Expanded = uneof(Macros.expand(ID, Args));
+ Tokens.append(Expanded.begin(), Expanded.end());
+
+ TokenList UnexpandedTokens;
+ for (const UnwrappedLineNode &Node : Unexpanded[ID]->Tokens) {
+ UnexpandedTokens.push_back(Node.Tok);
+ }
+ return UnexpandedTokens;
+ }
+
+ TokenList expand(llvm::StringRef Name,
+ const std::vector<std::string> &Args = {}) {
+ return expand(Name, lexArgs(Args));
+ }
+
+ const UnexpandedMap &getUnexpanded() const { return Unexpanded; }
+
+ const TokenList &getTokens() const { return Tokens; }
+
+private:
+ llvm::SmallVector<TokenList, 1>
+ lexArgs(const std::vector<std::string> &Args) {
+ llvm::SmallVector<TokenList, 1> Result;
+ for (const auto &Arg : Args) {
+ Result.push_back(uneof(Lex.lex(Arg)));
+ }
+ return Result;
+ }
+ llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
+ llvm::SmallVector<FormatToken *, 8> Tokens;
+ TestLexer &Lex;
+ MacroExpander &Macros;
+};
+
+struct Chunk {
+ Chunk(llvm::ArrayRef<FormatToken *> Tokens)
+ : Tokens(Tokens.begin(), Tokens.end()) {}
+ Chunk(llvm::ArrayRef<UnwrappedLine> Children)
+ : Children(Children.begin(), Children.end()) {}
+ llvm::SmallVector<UnwrappedLineNode, 1> Tokens;
+ llvm::SmallVector<UnwrappedLine, 0> Children;
+};
+
+bool tokenMatches(const FormatToken *Left, const FormatToken *Right) {
+ if (Left->getType() == Right->getType() &&
+ Left->TokenText == Right->TokenText)
+ return true;
+ llvm::dbgs() << Left->TokenText << " != " << Right->TokenText << "\n";
+ return false;
+}
+
+// Allows to produce chunks of a token list by typing the code of equal tokens.
+//
+// Created from a list of tokens, users call "consume" to get the next chunk
+// of tokens, checking that they match the written code.
+struct Matcher {
+ Matcher(const TokenList &Tokens, TestLexer &Lex)
+ : Tokens(Tokens), It(this->Tokens.begin()), Lex(Lex) {}
+
+ Chunk consume(StringRef Tokens) {
+ TokenList Result;
+ for (const FormatToken *Token : uneof(Lex.lex(Tokens))) {
+ assert(tokenMatches(*It, Token));
+ Result.push_back(*It);
+ ++It;
+ }
+ return Chunk(Result);
+ }
+
+ TokenList Tokens;
+ TokenList::iterator It;
+ TestLexer &Lex;
+};
+
+UnexpandedMap mergeUnexpanded(const UnexpandedMap &M1,
+ const UnexpandedMap &M2) {
+ UnexpandedMap Result;
+ for (const auto &KV : M1) {
+ Result[KV.first] = std::make_unique<UnwrappedLine>(*KV.second);
+ }
+ for (const auto &KV : M2) {
+ Result[KV.first] = std::make_unique<UnwrappedLine>(*KV.second);
+ }
+ return Result;
+}
+
+class MacroCallReconstructorTest : public ::testing::Test {
+public:
+ MacroCallReconstructorTest() : Lex(Allocator, Buffers) {}
+
+ std::unique_ptr<MacroExpander>
+ createExpander(const std::vector<std::string> &MacroDefinitions) {
+ return std::make_unique<MacroExpander>(MacroDefinitions,
+ Lex.SourceMgr.get(), Lex.Style,
+ Lex.Allocator, Lex.IdentTable);
+ }
+
+ UnwrappedLine line(llvm::ArrayRef<FormatToken *> Tokens) {
+ UnwrappedLine Result;
+ for (FormatToken *Tok : Tokens) {
+ Result.Tokens.push_back(UnwrappedLineNode(Tok));
+ }
+ return Result;
+ }
+
+ UnwrappedLine line(llvm::StringRef Text) { return line({lex(Text)}); }
+
+ UnwrappedLine line(llvm::ArrayRef<Chunk> Chunks) {
+ UnwrappedLine Result;
+ for (const Chunk &Chunk : Chunks) {
+ Result.Tokens.insert(Result.Tokens.end(), Chunk.Tokens.begin(),
+ Chunk.Tokens.end());
+ assert(!Result.Tokens.empty());
+ Result.Tokens.back().Children.append(Chunk.Children.begin(),
+ Chunk.Children.end());
+ }
+ return Result;
+ }
+
+ TokenList lex(llvm::StringRef Text) { return uneof(Lex.lex(Text)); }
+
+ Chunk tokens(llvm::StringRef Text) { return Chunk(lex(Text)); }
+
+ Chunk children(llvm::ArrayRef<UnwrappedLine> Children) {
+ return Chunk(Children);
+ }
+
+ llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+ std::vector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
+ TestLexer Lex;
+};
+
+bool matchesTokens(const UnwrappedLine &L1, const UnwrappedLine &L2) {
+ if (L1.Tokens.size() != L2.Tokens.size())
+ return false;
+ for (auto L1It = L1.Tokens.begin(), L2It = L2.Tokens.begin();
+ L1It != L1.Tokens.end(); ++L1It, ++L2It) {
+ if (L1It->Tok != L2It->Tok)
+ return false;
+ if (L1It->Children.size() != L2It->Children.size())
+ return false;
+ for (auto L1ChildIt = L1It->Children.begin(),
+ L2ChildIt = L2It->Children.begin();
+ L1ChildIt != L1It->Children.end(); ++L1ChildIt, ++L2ChildIt) {
+ if (!matchesTokens(*L1ChildIt, *L2ChildIt))
+ return false;
+ }
+ }
+ return true;
+}
+MATCHER_P(matchesLine, line, "") { return matchesTokens(arg, line); }
+
+TEST_F(MacroCallReconstructorTest, Identifier) {
+ auto Macros = createExpander({"X=x"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("X");
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Unexp.addLine(line(Exp.getTokens()));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(line(U.consume("X"))));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedLineWithinCall) {
+ auto Macros = createExpander({"C(a)=class X { a; };"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("C", {"void f()"});
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ Unexp.addLine(line(E.consume("class X {")));
+ EXPECT_FALSE(Unexp.finished());
+ Unexp.addLine(line(E.consume("void f();")));
+ EXPECT_FALSE(Unexp.finished());
+ Unexp.addLine(line(E.consume("};")));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ EXPECT_THAT(std::move(Unexp).takeResult(),
+ matchesLine(line(U.consume("C(void f())"))));
+}
+
+TEST_F(MacroCallReconstructorTest, MultipleLinesInNestedMultiParamsExpansion) {
+ auto Macros = createExpander({"C(a, b)=a b", "B(a)={a}"});
+ Expansion Exp1(Lex, *Macros);
+ TokenList Call1 = Exp1.expand("B", {"b"});
+ Expansion Exp2(Lex, *Macros);
+ TokenList Call2 = Exp2.expand("C", {uneof(Lex.lex("a")), Exp1.getTokens()});
+
+ UnexpandedMap Unexpanded =
+ mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+ MacroCallReconstructor Unexp(0, Unexpanded);
+ Matcher E(Exp2.getTokens(), Lex);
+ Unexp.addLine(line(E.consume("a")));
+ EXPECT_FALSE(Unexp.finished());
+ Unexp.addLine(line(E.consume("{")));
+ EXPECT_FALSE(Unexp.finished());
+ Unexp.addLine(line(E.consume("b")));
+ EXPECT_FALSE(Unexp.finished());
+ Unexp.addLine(line(E.consume("}")));
+ EXPECT_TRUE(Unexp.finished());
+
+ Matcher U1(Call1, Lex);
+ auto Middle = U1.consume("B(b)");
+ Matcher U2(Call2, Lex);
+ auto Chunk1 = U2.consume("C(a, ");
+ auto Chunk2 = U2.consume("{ b }");
+ auto Chunk3 = U2.consume(")");
+
+ EXPECT_THAT(std::move(Unexp).takeResult(),
+ matchesLine(line({Chunk1, Middle, Chunk3})));
+}
+
+TEST_F(MacroCallReconstructorTest, StatementSequence) {
+ auto Macros = createExpander({"SEMI=;"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call1 = Exp.expand("SEMI");
+ TokenList Call2 = Exp.expand("SEMI");
+ TokenList Call3 = Exp.expand("SEMI");
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ Unexp.addLine(line(E.consume(";")));
+ EXPECT_TRUE(Unexp.finished());
+ Unexp.addLine(line(E.consume(";")));
+ EXPECT_TRUE(Unexp.finished());
+ Unexp.addLine(line(E.consume(";")));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U1(Call1, Lex);
+ Matcher U2(Call2, Lex);
+ Matcher U3(Call3, Lex);
+ EXPECT_THAT(std::move(Unexp).takeResult(),
+ matchesLine(line(
+ {U1.consume("SEMI"),
+ children({line({U2.consume("SEMI"),
+ children({line(U3.consume("SEMI"))})})})})));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedBlock) {
+ auto Macros = createExpander({"ID(x)=x"});
+ // Test: ID({ ID(a *b); })
+ // 1. expand ID(a *b) -> a *b
+ Expansion Exp1(Lex, *Macros);
+ TokenList Call1 = Exp1.expand("ID", {"a *b"});
+ // 2. expand ID({ a *b; })
+ TokenList Arg;
+ Arg.push_back(Lex.id("{"));
+ Arg.append(Exp1.getTokens().begin(), Exp1.getTokens().end());
+ Arg.push_back(Lex.id(";"));
+ Arg.push_back(Lex.id("}"));
+ Expansion Exp2(Lex, *Macros);
+ TokenList Call2 = Exp2.expand("ID", {Arg});
+
+ // Consume as-if formatted:
+ // {
+ // a *b;
+ // }
+ UnexpandedMap Unexpanded =
+ mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+ MacroCallReconstructor Unexp(0, Unexpanded);
+ Matcher E(Exp2.getTokens(), Lex);
+ Unexp.addLine(line(E.consume("{")));
+ EXPECT_FALSE(Unexp.finished());
+ Unexp.addLine(line(E.consume("a *b;")));
+ EXPECT_FALSE(Unexp.finished());
+ Unexp.addLine(line(E.consume("}")));
+ EXPECT_TRUE(Unexp.finished());
+
+ // Expect lines:
+ // ID({
+ // ID(a *b);
+ // })
+ Matcher U1(Call1, Lex);
+ Matcher U2(Call2, Lex);
+ auto Chunk2Start = U2.consume("ID(");
+ auto Chunk2LBrace = U2.consume("{");
+ U2.consume("a *b");
+ auto Chunk2Mid = U2.consume(";");
+ auto Chunk2RBrace = U2.consume("}");
+ auto Chunk2End = U2.consume(")");
+ auto Chunk1 = U1.consume("ID(a *b)");
+
+ auto Expected = line({Chunk2Start,
+ children({
+ line(Chunk2LBrace),
+ line({Chunk1, Chunk2Mid}),
+ line(Chunk2RBrace),
+ }),
+ Chunk2End});
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedChildBlocks) {
+ auto Macros = createExpander({"ID(x)=x", "CALL(x)=f([] { x })"});
+ // Test: ID(CALL(CALL(return a * b;)))
+ // 1. expand CALL(return a * b;)
+ Expansion Exp1(Lex, *Macros);
+ TokenList Call1 = Exp1.expand("CALL", {"return a * b;"});
+ // 2. expand CALL(f([] { return a * b; }))
+ Expansion Exp2(Lex, *Macros);
+ TokenList Call2 = Exp2.expand("CALL", {Exp1.getTokens()});
+ // 3. expand ID({ f([] { f([] { return a * b; }) }) })
+ TokenList Arg3;
+ Arg3.push_back(Lex.id("{"));
+ Arg3.append(Exp2.getTokens().begin(), Exp2.getTokens().end());
+ Arg3.push_back(Lex.id("}"));
+ Expansion Exp3(Lex, *Macros);
+ TokenList Call3 = Exp3.expand("ID", {Arg3});
+
+ // Consume as-if formatted in three unwrapped lines:
+ // 0: {
+ // 1: f([] {
+ // f([] {
+ // return a * b;
+ // })
+ // })
+ // 2: }
+ UnexpandedMap Unexpanded = mergeUnexpanded(
+ Exp1.getUnexpanded(),
+ mergeUnexpanded(Exp2.getUnexpanded(), Exp3.getUnexpanded()));
+ MacroCallReconstructor Unexp(0, Unexpanded);
+ Matcher E(Exp3.getTokens(), Lex);
+ Unexp.addLine(line(E.consume("{")));
+ Unexp.addLine(
+ line({E.consume("f([] {"),
+ children({line({E.consume("f([] {"),
+ children({line(E.consume("return a * b;"))}),
+ E.consume("})")})}),
+ E.consume("})")}));
+ Unexp.addLine(line(E.consume("}")));
+ EXPECT_TRUE(Unexp.finished());
+
+ // Expect lines:
+ // ID(
+ // {
+ // CALL(CALL(return a * b;))
+ // }
+ // )
+ Matcher U1(Call1, Lex);
+ Matcher U2(Call2, Lex);
+ Matcher U3(Call3, Lex);
+ auto Chunk3Start = U3.consume("ID(");
+ auto Chunk3LBrace = U3.consume("{");
+ U3.consume("f([] { f([] { return a * b; }) })");
+ auto Chunk3RBrace = U3.consume("}");
+ auto Chunk3End = U3.consume(")");
+ auto Chunk2Start = U2.consume("CALL(");
+ U2.consume("f([] { return a * b; })");
+ auto Chunk2End = U2.consume(")");
+ auto Chunk1 = U1.consume("CALL(return a * b;)");
+
+ auto Expected = line({
+ Chunk3Start,
+ children({
+ line(Chunk3LBrace),
+ line({
+ Chunk2Start,
+ Chunk1,
+ Chunk2End,
+ }),
+ line(Chunk3RBrace),
+ }),
+ Chunk3End,
+ });
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedChildrenMultipleArguments) {
+ auto Macros = createExpander({"CALL(a, b)=f([] { a; b; })"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("CALL", {std::string("int a"), "int b"});
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ Unexp.addLine(line({
+ E.consume("f([] {"),
+ children({
+ line(E.consume("int a;")),
+ line(E.consume("int b;")),
+ }),
+ E.consume("})"),
+ }));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ auto Expected = line(U.consume("CALL(int a, int b)"));
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, ReverseOrderArgumentsInExpansion) {
+ auto Macros = createExpander({"CALL(a, b)=b + a"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("CALL", {std::string("x"), "y"});
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ Unexp.addLine(line(E.consume("y + x")));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ auto Expected = line(U.consume("CALL(x, y)"));
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, MultipleToplevelUnwrappedLines) {
+ auto Macros = createExpander({"ID(a, b)=a b"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("ID", {std::string("x; x"), "y"});
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ Unexp.addLine(line(E.consume("x;")));
+ Unexp.addLine(line(E.consume("x y")));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ auto Expected = line({
+ U.consume("ID("),
+ children({
+ line(U.consume("x;")),
+ line(U.consume("x")),
+ }),
+ U.consume(", y)"),
+ });
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, NestedCallsMultipleLines) {
+ auto Macros = createExpander({"ID(x)=x"});
+ // Test: ID({ID(a * b);})
+ // 1. expand ID(a * b)
+ Expansion Exp1(Lex, *Macros);
+ TokenList Call1 = Exp1.expand("ID", {"a * b"});
+ // 2. expand ID({ a * b; })
+ Expansion Exp2(Lex, *Macros);
+ TokenList Arg2;
+ Arg2.push_back(Lex.id("{"));
+ Arg2.append(Exp1.getTokens().begin(), Exp1.getTokens().end());
+ Arg2.push_back(Lex.id(";"));
+ Arg2.push_back(Lex.id("}"));
+ TokenList Call2 = Exp2.expand("ID", {Arg2});
+
+ // Consume as-if formatted in three unwrapped lines:
+ // 0: {
+ // 1: a * b;
+ // 2: }
+ UnexpandedMap Unexpanded =
+ mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+ MacroCallReconstructor Unexp(0, Unexpanded);
+ Matcher E(Exp2.getTokens(), Lex);
+ Unexp.addLine(line(E.consume("{")));
+ Unexp.addLine(line(E.consume("a * b;")));
+ Unexp.addLine(line(E.consume("}")));
+ EXPECT_TRUE(Unexp.finished());
+
+ // Expect lines:
+ // ID(
+ // {
+ // ID(a * b);
+ // }
+ // )
+ Matcher U1(Call1, Lex);
+ Matcher U2(Call2, Lex);
+ auto Chunk2Start = U2.consume("ID(");
+ auto Chunk2LBrace = U2.consume("{");
+ U2.consume("a * b");
+ auto Chunk2Semi = U2.consume(";");
+ auto Chunk2RBrace = U2.consume("}");
+ auto Chunk2End = U2.consume(")");
+ auto Chunk1 = U1.consume("ID(a * b)");
+
+ auto Expected = line({
+ Chunk2Start,
+ children({
+ line({Chunk2LBrace}),
+ line({Chunk1, Chunk2Semi}),
+ line({Chunk2RBrace}),
+ }),
+ Chunk2End,
+ });
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, ParentOutsideMacroCall) {
+ auto Macros = createExpander({"ID(a)=a"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("ID", {std::string("x; y; z;")});
+
+ auto Prefix = tokens("int a = []() {");
+ auto Postfix = tokens("}();");
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ Unexp.addLine(line({
+ Prefix,
+ children({
+ line(E.consume("x;")),
+ line(E.consume("y;")),
+ line(E.consume("z;")),
+ }),
+ Postfix,
+ }));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ auto Expected = line({
+ Prefix,
+ children({
+ line({
+ U.consume("ID("),
+ children({
+ line(U.consume("x;")),
+ line(U.consume("y;")),
+ line(U.consume("z;")),
+ }),
+ U.consume(")"),
+ }),
+ }),
+ Postfix,
+ });
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, UnusedMacroArguments) {
+ auto Macros = createExpander({"X=x"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("X", {"a", "b", "c"});
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Unexp.addLine(line(Exp.getTokens()));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ EXPECT_THAT(std::move(Unexp).takeResult(),
+ matchesLine(line(U.consume("X(a, b, c)"))));
+}
+
+TEST_F(MacroCallReconstructorTest, UnusedEmptyMacroArgument) {
+ auto Macros = createExpander({"X=x"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("X", {std::string("")});
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ auto Semi = tokens(";");
+ Unexp.addLine(line({E.consume("x"), Semi}));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ EXPECT_THAT(std::move(Unexp).takeResult(),
+ matchesLine(line({U.consume("X()"), Semi})));
+}
+
+TEST_F(MacroCallReconstructorTest, ChildrenSplitAcrossArguments) {
+ auto Macros = createExpander({"CALL(a, b)=f([]() a b)"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("CALL", {std::string("{ a;"), "b; }"});
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ Unexp.addLine(line({
+ E.consume("f([]() {"),
+ children({
+ line(E.consume("a;")),
+ line(E.consume("b;")),
+ }),
+ E.consume("})"),
+ }));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ auto Expected = line({
+ U.consume("CALL({"),
+ children(line(U.consume("a;"))),
+ U.consume(", b; })"),
+ });
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, ChildrenAfterMacroCall) {
+ auto Macros = createExpander({"CALL(a, b)=f([]() a b"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("CALL", {std::string("{ a"), "b"});
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ auto Semi = tokens(";");
+ auto SecondLine = tokens("c d;");
+ auto ThirdLine = tokens("e f;");
+ auto Postfix = tokens("})");
+ Unexp.addLine(line({
+ E.consume("f([]() {"),
+ children({
+ line({E.consume("a b"), Semi}),
+ line(SecondLine),
+ line(ThirdLine),
+ }),
+ Postfix,
+ }));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ auto Expected = line({
+ U.consume("CALL({"),
+ children(line(U.consume("a"))),
+ U.consume(", b)"),
+ Semi,
+ children(line({
+ SecondLine,
+ children(line({
+ ThirdLine,
+ Postfix,
+ })),
+ })),
+ });
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroCallReconstructorTest, InvalidCodeSplittingBracesAcrossArgs) {
+ auto Macros = createExpander({"M(a, b)=(a) (b)"});
+ Expansion Exp(Lex, *Macros);
+ TokenList Call = Exp.expand("M", {std::string("{"), "x", ""});
+
+ MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
+ Matcher E(Exp.getTokens(), Lex);
+ auto Prefix = tokens("({");
+ Unexp.addLine(line({
+ Prefix,
+ children({
+ line({
+ E.consume("({"),
+ children({line(E.consume(")(x)"))}),
+ }),
+ }),
+ }));
+ EXPECT_TRUE(Unexp.finished());
+ Matcher U(Call, Lex);
+ auto Expected = line({
+ Prefix,
+ children({line(U.consume("M({,x,)"))}),
+ });
+ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
+}
+
+} // namespace
+} // namespace format
+} // namespace clang
More information about the cfe-commits
mailing list