[clang] 0140283 - [clang-format] Add simple macro replacements in formatting.
Manuel Klimek via cfe-commits
cfe-commits at lists.llvm.org
Fri Feb 24 07:49:22 PST 2023
Author: Manuel Klimek
Date: 2023-02-24T15:44:24Z
New Revision: 01402831aaae76e9c61595f9aa81a506b0d926eb
URL: https://github.com/llvm/llvm-project/commit/01402831aaae76e9c61595f9aa81a506b0d926eb
DIFF: https://github.com/llvm/llvm-project/commit/01402831aaae76e9c61595f9aa81a506b0d926eb.diff
LOG: [clang-format] Add simple macro replacements in formatting.
Add configuration to specify macros.
Macros will be expanded, and the code will be parsed and annotated
in the expanded state. In a second step, the formatting decisions
in the annotated expanded code will be reconstructed onto the
original unexpanded macro call.
Eventually, this will allow to remove special-case code for
various macro options we accumulated over the years in favor of
one principled mechanism.
Differential Revision: https://reviews.llvm.org/D144170
Added:
Modified:
clang/include/clang/Format/Format.h
clang/lib/Format/ContinuationIndenter.cpp
clang/lib/Format/Format.cpp
clang/lib/Format/FormatToken.h
clang/lib/Format/MacroExpander.cpp
clang/lib/Format/Macros.h
clang/lib/Format/TokenAnalyzer.cpp
clang/lib/Format/TokenAnalyzer.h
clang/lib/Format/TokenAnnotator.cpp
clang/lib/Format/TokenAnnotator.h
clang/lib/Format/UnwrappedLineFormatter.cpp
clang/lib/Format/UnwrappedLineParser.cpp
clang/lib/Format/UnwrappedLineParser.h
clang/lib/Format/WhitespaceManager.cpp
clang/unittests/Format/FormatTest.cpp
clang/unittests/Format/MacroCallReconstructorTest.cpp
clang/unittests/Format/MacroExpanderTest.cpp
clang/unittests/Format/TestLexer.h
Removed:
################################################################################
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 0d57d7996525a..66904a6a11232 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -2745,6 +2745,39 @@ struct FormatStyle {
/// \version 3.7
std::string MacroBlockEnd;
+ /// A list of macros of the form \c <definition>=<expansion> .
+ ///
+ /// Code will be parsed with macros expanded, in order to determine how to
+ /// interpret and format the macro arguments.
+ ///
+ /// For example, the code:
+ /// \code
+ /// A(a*b);
+ /// \endcode
+ /// will usually be interpreted as a call to a function A, and the
+ /// multiplication expression will be formatted as `a * b`.
+ ///
+ /// If we specify the macro definition:
+ /// \code
+ /// Macros:
+ /// - A(x)=x
+ /// \endcode
+ /// the code will now be parsed as a declaration of the variable b of type a*,
+ /// and formatted as `a* b` (depending on pointer-binding rules).
+ ///
+ /// Features and restrictions:
+ /// * Both function-like macros and object-like macros are supported.
+ /// * Macro arguments must be used exactly once in the expansion.
+ /// * No recursive expansion; macros referencing other macros will be
+ /// ignored.
+ /// * Overloading by arity is supported: for example, given the macro
+ /// definitions A=x, A()=y, A(a)=a,
+ /// 'A;' -> 'x;'
+ /// 'A();' -> 'y;'
+ /// 'A(z);' -> 'z;'
+ /// 'A(a, b) will not be expanded.
+ std::vector<std::string> Macros;
+
/// The maximum number of consecutive empty lines to keep.
/// \code
/// MaxEmptyLinesToKeep: 1 vs. MaxEmptyLinesToKeep: 0
@@ -4306,7 +4339,8 @@ struct FormatStyle {
StatementAttributeLikeMacros == R.StatementAttributeLikeMacros &&
StatementMacros == R.StatementMacros && TabWidth == R.TabWidth &&
TypenameMacros == R.TypenameMacros && UseTab == R.UseTab &&
- WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros;
+ WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros &&
+ Macros == R.Macros;
}
std::optional<FormatStyle> GetLanguageStyle(LanguageKind Language) const;
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index c7559874fd4c7..d2f309f3874ed 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -18,6 +18,7 @@
#include "WhitespaceManager.h"
#include "clang/Basic/OperatorPrecedence.h"
#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.h"
#include "clang/Format/Format.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Debug.h"
@@ -739,9 +740,14 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
if (Previous.is(TT_TemplateString) && Previous.opensScope())
CurrentState.NoLineBreak = true;
+ // Align following lines within parentheses / brackets if configured.
+ // Note: This doesn't apply to macro expansion lines, which are MACRO( , , )
+ // with args as children of the '(' and ',' tokens. It does not make sense to
+ // align the commas with the opening paren.
if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
!CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) &&
+ !(Current.MacroParent && Previous.MacroParent) &&
(Current.isNot(TT_LineComment) || Previous.is(BK_BracedInit))) {
CurrentState.Indent = State.Column + Spaces;
CurrentState.IsAligned = true;
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 755b7c6620553..d3062905ad7ee 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1036,6 +1036,7 @@ template <> struct MappingTraits<FormatStyle> {
IO.mapOptional("UseTab", Style.UseTab);
IO.mapOptional("WhitespaceSensitiveMacros",
Style.WhitespaceSensitiveMacros);
+ IO.mapOptional("Macros", Style.Macros);
// If AlwaysBreakAfterDefinitionReturnType was specified but
// AlwaysBreakAfterReturnType was not, initialize the latter from the
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 8ecf3cd38f7e1..70ecf7cacccb2 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -377,6 +377,11 @@ struct FormatToken {
/// binary operator.
TokenType getType() const { return Type; }
void setType(TokenType T) {
+ // If this token is a macro argument while formatting an unexpanded macro
+ // call, we do not change its type any more - the type was deduced from
+ // formatting the expanded macro stream already.
+ if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg)
+ return;
assert((!TypeIsFinalized || T == Type) &&
"Please use overwriteFixedType to change a fixed type.");
Type = T;
diff --git a/clang/lib/Format/MacroExpander.cpp b/clang/lib/Format/MacroExpander.cpp
index 9c6bcb8764f40..e03d046114400 100644
--- a/clang/lib/Format/MacroExpander.cpp
+++ b/clang/lib/Format/MacroExpander.cpp
@@ -141,24 +141,44 @@ void MacroExpander::parseDefinition(const std::string &Macro) {
if (!Tokens.empty()) {
DefinitionParser Parser(Tokens);
auto Definition = Parser.parse();
- Definitions[Definition.Name] = std::move(Definition);
+ if (Definition.ObjectLike) {
+ ObjectLike[Definition.Name] = std::move(Definition);
+ } else {
+ FunctionLike[Definition.Name][Definition.Params.size()] =
+ std::move(Definition);
+ }
}
}
bool MacroExpander::defined(llvm::StringRef Name) const {
- return Definitions.find(Name) != Definitions.end();
+ return FunctionLike.find(Name) != FunctionLike.end() ||
+ ObjectLike.find(Name) != ObjectLike.end();
}
bool MacroExpander::objectLike(llvm::StringRef Name) const {
- return Definitions.find(Name)->second.ObjectLike;
+ return ObjectLike.find(Name) != ObjectLike.end();
}
-llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
- ArgsList Args) const {
- assert(defined(ID->TokenText));
- SmallVector<FormatToken *, 8> Result;
- const Definition &Def = Definitions.find(ID->TokenText)->second;
+bool MacroExpander::hasArity(llvm::StringRef Name, unsigned Arity) const {
+ auto it = FunctionLike.find(Name);
+ return it != FunctionLike.end() &&
+ (it->second.find(Arity) != it->second.end());
+}
+llvm::SmallVector<FormatToken *, 8>
+MacroExpander::expand(FormatToken *ID,
+ std::optional<ArgsList> OptionalArgs) const {
+ if (OptionalArgs)
+ assert(hasArity(ID->TokenText, OptionalArgs->size()));
+ else
+ assert(objectLike(ID->TokenText));
+ const Definition &Def = OptionalArgs
+ ? FunctionLike.find(ID->TokenText)
+ ->second.find(OptionalArgs.value().size())
+ ->second
+ : ObjectLike.find(ID->TokenText)->second;
+ ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList();
+ SmallVector<FormatToken *, 8> Result;
// Expand each argument at most once.
llvm::StringSet<> ExpandedArgs;
diff --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h
index b26799c20f8c4..1964624e828ce 100644
--- a/clang/lib/Format/Macros.h
+++ b/clang/lib/Format/Macros.h
@@ -106,17 +106,23 @@ class MacroExpander {
IdentifierTable &IdentTable);
~MacroExpander();
- /// Returns whether a macro \p Name is defined.
+ /// Returns whether any macro \p Name is defined, regardless of overloads.
bool defined(llvm::StringRef Name) const;
- /// Returns whether the macro has no arguments and should not consume
- /// subsequent parentheses.
+ /// Returns whetherh there is an object-like overload, i.e. where the macro
+ /// has no arguments and should not consume subsequent parentheses.
bool objectLike(llvm::StringRef Name) const;
+ /// Returns whether macro \p Name provides an overload with the given arity.
+ bool hasArity(llvm::StringRef Name, unsigned Arity) const;
+
/// Returns the expanded stream of format tokens for \p ID, where
/// each element in \p Args is a positional argument to the macro call.
- llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID,
- ArgsList Args) const;
+ /// If \p Args is not set, the object-like overload is used.
+ /// If \p Args is set, the overload with the arity equal to \c Args.size() is
+ /// used.
+ llvm::SmallVector<FormatToken *, 8>
+ expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const;
private:
struct Definition;
@@ -129,7 +135,8 @@ class MacroExpander {
llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
IdentifierTable &IdentTable;
SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
- llvm::StringMap<Definition> Definitions;
+ llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike;
+ llvm::StringMap<Definition> ObjectLike;
};
/// Converts a sequence of UnwrappedLines containing expanded macros into a
@@ -149,7 +156,7 @@ class MacroExpander {
///
/// After this point, the state of the spelled/expanded stream is "in sync"
/// (both at the start of an UnwrappedLine, with no macros open), so the
-/// Unexpander can be thrown away and parsing can continue.
+/// Reconstructor can be thrown away and parsing can continue.
///
/// Given a mapping from the macro name identifier token in the macro call
/// to the tokens of the macro call, for example:
diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp
index 77e403581a0d5..bd648c430f9b0 100644
--- a/clang/lib/Format/TokenAnalyzer.cpp
+++ b/clang/lib/Format/TokenAnalyzer.cpp
@@ -104,12 +104,12 @@ TokenAnalyzer::process(bool SkipAnnotation) {
IdentifierTable IdentTable(getFormattingLangOpts(Style));
FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(),
Env.getFirstStartColumn(), Style, Encoding, Allocator,
-
IdentTable);
ArrayRef<FormatToken *> Toks(Lex.lex());
SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end());
- UnwrappedLineParser Parser(Style, Lex.getKeywords(),
- Env.getFirstStartColumn(), Tokens, *this);
+ UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(),
+ Env.getFirstStartColumn(), Tokens, *this,
+ Allocator, IdentTable);
Parser.parse();
assert(UnwrappedLines.back().empty());
unsigned Penalty = 0;
diff --git a/clang/lib/Format/TokenAnalyzer.h b/clang/lib/Format/TokenAnalyzer.h
index e5cc1287c6167..4086dab1c94c3 100644
--- a/clang/lib/Format/TokenAnalyzer.h
+++ b/clang/lib/Format/TokenAnalyzer.h
@@ -46,7 +46,7 @@ class Environment {
FileID getFileID() const { return ID; }
- const SourceManager &getSourceManager() const { return SM; }
+ SourceManager &getSourceManager() const { return SM; }
ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; }
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 101e958321866..fa80588cd3b26 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -2614,6 +2614,13 @@ class ExpressionParser {
// Consume operators with higher precedence.
parse(Precedence + 1);
+ // Do not assign fake parenthesis to tokens that are part of an
+ // unexpanded macro call. The line within the macro call contains
+ // the parenthesis and commas, and we will not find operators within
+ // that structure.
+ if (Current && Current->MacroParent)
+ break;
+
int CurrentPrecedence = getCurrentPrecedence();
if (Precedence == CurrentPrecedence && Current &&
@@ -4389,8 +4396,12 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) {
return true;
}
- if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen))
+ if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen) &&
+ // In an unexpanded macro call we only find the parentheses and commas
+ // in a line; the commas and closing parenthesis do not require a space.
+ (Left.Children.empty() || !Left.MacroParent)) {
return true;
+ }
if (Right.is(tok::comma))
return false;
if (Right.is(TT_ObjCBlockLParen))
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 0a6b4f69f38e0..611e95ba11b01 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -65,20 +65,32 @@ class AnnotatedLine {
// left them in a
diff erent state.
First->Previous = nullptr;
FormatToken *Current = First;
+ addChildren(Line.Tokens.front(), Current);
for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
+ if (Node.Tok->MacroParent)
+ ContainsMacroCall = true;
Current->Next = Node.Tok;
Node.Tok->Previous = Current;
Current = Current->Next;
- Current->Children.clear();
- for (const auto &Child : Node.Children) {
- Children.push_back(new AnnotatedLine(Child));
- Current->Children.push_back(Children.back());
- }
+ addChildren(Node, Current);
+ // FIXME: if we add children, previous will point to the token before
+ // the children; changing this requires significant changes across
+ // clang-format.
}
Last = Current;
Last->Next = nullptr;
}
+ void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
+ Current->Children.clear();
+ for (const auto &Child : Node.Children) {
+ Children.push_back(new AnnotatedLine(Child));
+ if (Children.back()->ContainsMacroCall)
+ ContainsMacroCall = true;
+ Current->Children.push_back(Children.back());
+ }
+ }
+
~AnnotatedLine() {
for (AnnotatedLine *Child : Children)
delete Child;
@@ -149,6 +161,9 @@ class AnnotatedLine {
bool MightBeFunctionDecl;
bool IsMultiVariableDeclStmt;
+ /// \c True if this line contains a macro call for which an expansion exists.
+ bool ContainsMacroCall = false;
+
/// \c True if this line should be formatted, i.e. intersects directly or
/// indirectly with one of the input ranges.
bool Affected;
diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index 2f3c0897b7a38..2e3441e6caec0 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "UnwrappedLineFormatter.h"
+#include "FormatToken.h"
#include "NamespaceEndCommentsFixer.h"
#include "WhitespaceManager.h"
#include "llvm/Support/Debug.h"
@@ -918,9 +919,22 @@ class LineJoiner {
static void markFinalized(FormatToken *Tok) {
for (; Tok; Tok = Tok->Next) {
- Tok->Finalized = true;
- for (AnnotatedLine *Child : Tok->Children)
- markFinalized(Child->First);
+ if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) {
+ // In the first pass we format all macro arguments in the expanded token
+ // stream. Instead of finalizing the macro arguments, we mark that they
+ // will be modified as unexpanded arguments (as part of the macro call
+ // formatting) in the next pass.
+ Tok->MacroCtx->Role = MR_UnexpandedArg;
+ // Reset whether spaces are required before this token, as that is context
+ // dependent, and that context may change when formatting the macro call.
+ // For example, given M(x) -> 2 * x, and the macro call M(var),
+ // the token 'var' will have SpacesRequiredBefore = 1 after being
+ // formatted as part of the expanded macro, but SpacesRequiredBefore = 0
+ // for its position within the macro call.
+ Tok->SpacesRequiredBefore = 0;
+ } else {
+ Tok->Finalized = true;
+ }
}
}
@@ -975,15 +989,15 @@ class LineFormatter {
bool formatChildren(LineState &State, bool NewLine, bool DryRun,
unsigned &Penalty) {
const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
+ bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block);
FormatToken &Previous = *State.NextToken->Previous;
- if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) ||
- Previous.Children.size() == 0) {
+ if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) {
// The previous token does not open a block. Nothing to do. We don't
// assert so that we can simply call this function for all tokens.
return true;
}
- if (NewLine) {
+ if (NewLine || Previous.MacroParent) {
const ParenState &P = State.Stack.back();
int AdditionalIndent =
@@ -1349,11 +1363,12 @@ unsigned UnwrappedLineFormatter::format(
NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
bool FitsIntoOneLine =
- TheLine.Last->TotalLength + Indent <= ColumnLimit ||
- (TheLine.Type == LT_ImportStatement &&
- (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
- (Style.isCSharp() &&
- TheLine.InPPDirective); // don't split #regions in C#
+ !TheLine.ContainsMacroCall &&
+ (TheLine.Last->TotalLength + Indent <= ColumnLimit ||
+ (TheLine.Type == LT_ImportStatement &&
+ (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) ||
+ (Style.isCSharp() &&
+ TheLine.InPPDirective)); // don't split #regions in C#
if (Style.ColumnLimit == 0) {
NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
.formatLine(TheLine, NextStartColumn + Indent,
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 52afbc6cff94c..fb7b7be453c1f 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -14,11 +14,15 @@
#include "UnwrappedLineParser.h"
#include "FormatToken.h"
+#include "FormatTokenLexer.h"
#include "FormatTokenSource.h"
+#include "Macros.h"
#include "TokenAnnotator.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_os_ostream.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -143,11 +147,12 @@ class CompoundStatementIndenter {
unsigned OldLineLevel;
};
-UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
- const AdditionalKeywords &Keywords,
- unsigned FirstStartColumn,
- ArrayRef<FormatToken *> Tokens,
- UnwrappedLineConsumer &Callback)
+UnwrappedLineParser::UnwrappedLineParser(
+ SourceManager &SourceMgr, const FormatStyle &Style,
+ const AdditionalKeywords &Keywords, unsigned FirstStartColumn,
+ ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback,
+ llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+ IdentifierTable &IdentTable)
: Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
CurrentLines(&Lines), Style(Style), Keywords(Keywords),
CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
@@ -155,7 +160,8 @@ UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
? IG_Rejected
: IG_Inited),
- IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
+ IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
+ Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {}
void UnwrappedLineParser::reset() {
PPBranchLevel = -1;
@@ -173,6 +179,15 @@ void UnwrappedLineParser::reset() {
NestedTooDeep.clear();
PPStack.clear();
Line->FirstStartColumn = FirstStartColumn;
+
+ if (!Unexpanded.empty())
+ for (FormatToken *Token : AllTokens)
+ Token->MacroCtx.reset();
+ CurrentExpandedLines.clear();
+ ExpandedLines.clear();
+ Unexpanded.clear();
+ InExpansion = false;
+ Reconstruct.reset();
}
void UnwrappedLineParser::parse() {
@@ -196,12 +211,36 @@ void UnwrappedLineParser::parse() {
}
// Create line with eof token.
+ assert(FormatTok->is(tok::eof));
pushToken(FormatTok);
addUnwrappedLine();
- for (const UnwrappedLine &Line : Lines)
- Callback.consumeUnwrappedLine(Line);
+ // In a first run, format everything with the lines containing macro calls
+ // replaced by the expansion.
+ if (!ExpandedLines.empty()) {
+ LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n");
+ for (const auto &Line : Lines) {
+ if (!Line.Tokens.empty()) {
+ auto it = ExpandedLines.find(Line.Tokens.begin()->Tok);
+ if (it != ExpandedLines.end()) {
+ for (const auto &Expanded : it->second) {
+ LLVM_DEBUG(printDebugInfo(Expanded));
+ Callback.consumeUnwrappedLine(Expanded);
+ }
+ continue;
+ }
+ }
+ LLVM_DEBUG(printDebugInfo(Line));
+ Callback.consumeUnwrappedLine(Line);
+ }
+ Callback.finishRun();
+ }
+ LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n");
+ for (const UnwrappedLine &Line : Lines) {
+ LLVM_DEBUG(printDebugInfo(Line));
+ Callback.consumeUnwrappedLine(Line);
+ }
Callback.finishRun();
Lines.clear();
while (!PPLevelBranchIndex.empty() &&
@@ -724,7 +763,7 @@ FormatToken *UnwrappedLineParser::parseBlock(
parseParens();
size_t NbPreprocessorDirectives =
- CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
+ !parsingPPDirective() ? PreprocessorDirectives.size() : 0;
addUnwrappedLine();
size_t OpeningLineIndex =
CurrentLines->empty()
@@ -4152,12 +4191,25 @@ void UnwrappedLineParser::parseVerilogCaseLabel() {
Line->Level = OrigLevel;
}
+bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const {
+ for (const auto &N : Line.Tokens) {
+ if (N.Tok->MacroCtx)
+ return true;
+ for (const UnwrappedLine &Child : N.Children)
+ if (containsExpansion(Child))
+ return true;
+ }
+ return false;
+}
+
void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
if (Line->Tokens.empty())
return;
LLVM_DEBUG({
- if (CurrentLines == &Lines)
+ if (!parsingPPDirective()) {
+ llvm::dbgs() << "Adding unwrapped line:\n";
printDebugInfo(*Line);
+ }
});
// If this line closes a block when in Whitesmiths mode, remember that
@@ -4168,7 +4220,39 @@ void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
- CurrentLines->push_back(std::move(*Line));
+ // If the current line was expanded from a macro call, we use it to
+ // reconstruct an unwrapped line from the structure of the expanded unwrapped
+ // line and the unexpanded token stream.
+ if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) {
+ if (!Reconstruct)
+ Reconstruct.emplace(Line->Level, Unexpanded);
+ Reconstruct->addLine(*Line);
+
+ // While the reconstructed unexpanded lines are stored in the normal
+ // flow of lines, the expanded lines are stored on the side to be analyzed
+ // in an extra step.
+ CurrentExpandedLines.push_back(std::move(*Line));
+
+ if (Reconstruct->finished()) {
+ UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult();
+ assert(!Reconstructed.Tokens.empty() &&
+ "Reconstructed must at least contain the macro identifier.");
+ assert(!parsingPPDirective());
+ LLVM_DEBUG({
+ llvm::dbgs() << "Adding unexpanded line:\n";
+ printDebugInfo(Reconstructed);
+ });
+ ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines;
+ Lines.push_back(std::move(Reconstructed));
+ CurrentExpandedLines.clear();
+ Reconstruct.reset();
+ }
+ } else {
+ // At the top level we only get here when no unexpansion is going on, or
+ // when conditional formatting led to unfinished macro reconstructions.
+ assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0);
+ CurrentLines->push_back(std::move(*Line));
+ }
Line->Tokens.clear();
Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
Line->FirstStartColumn = 0;
@@ -4176,7 +4260,7 @@ void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
--Line->Level;
- if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
+ if (!parsingPPDirective() && !PreprocessorDirectives.empty()) {
CurrentLines->append(
std::make_move_iterator(PreprocessorDirectives.begin()),
std::make_move_iterator(PreprocessorDirectives.end()));
@@ -4470,6 +4554,87 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
continue;
}
+ if (FormatTok->is(tok::identifier) &&
+ Macros.defined(FormatTok->TokenText) &&
+ // FIXME: Allow expanding macros in preprocessor directives.
+ !Line->InPPDirective) {
+ FormatToken *ID = FormatTok;
+ unsigned Position = Tokens->getPosition();
+
+ // To correctly parse the code, we need to replace the tokens of the macro
+ // call with its expansion.
+ auto PreCall = std::move(Line);
+ Line.reset(new UnwrappedLine);
+ bool OldInExpansion = InExpansion;
+ InExpansion = true;
+ // We parse the macro call into a new line.
+ auto Args = parseMacroCall();
+ InExpansion = OldInExpansion;
+ assert(Line->Tokens.front().Tok == ID);
+ // And remember the unexpanded macro call tokens.
+ auto UnexpandedLine = std::move(Line);
+ // Reset to the old line.
+ Line = std::move(PreCall);
+
+ LLVM_DEBUG({
+ llvm::dbgs() << "Macro call: " << ID->TokenText << "(";
+ if (Args) {
+ llvm::dbgs() << "(";
+ for (const auto &Arg : Args.value())
+ for (const auto &T : Arg)
+ llvm::dbgs() << T->TokenText << " ";
+ llvm::dbgs() << ")";
+ }
+ llvm::dbgs() << "\n";
+ });
+ if (Macros.objectLike(ID->TokenText) && Args &&
+ !Macros.hasArity(ID->TokenText, Args->size())) {
+ // The macro is either
+ // - object-like, but we got argumnets, or
+ // - overloaded to be both object-like and function-like, but none of
+ // the function-like arities match the number of arguments.
+ // Thus, expand as object-like macro.
+ LLVM_DEBUG(llvm::dbgs()
+ << "Macro \"" << ID->TokenText
+ << "\" not overloaded for arity " << Args->size()
+ << "or not function-like, using object-like overload.");
+ Args.reset();
+ UnexpandedLine->Tokens.resize(1);
+ Tokens->setPosition(Position);
+ nextToken();
+ assert(!Args && Macros.objectLike(ID->TokenText));
+ }
+ if ((!Args && Macros.objectLike(ID->TokenText)) ||
+ (Args && Macros.hasArity(ID->TokenText, Args->size()))) {
+ // Next, we insert the expanded tokens in the token stream at the
+ // current position, and continue parsing.
+ Unexpanded[ID] = std::move(UnexpandedLine);
+ SmallVector<FormatToken *, 8> Expansion =
+ Macros.expand(ID, std::move(Args));
+ if (!Expansion.empty())
+ FormatTok = Tokens->insertTokens(Expansion);
+
+ LLVM_DEBUG({
+ llvm::dbgs() << "Expanded: ";
+ for (const auto &T : Expansion)
+ llvm::dbgs() << T->TokenText << " ";
+ llvm::dbgs() << "\n";
+ });
+ } else {
+ LLVM_DEBUG({
+ llvm::dbgs() << "Did not expand macro \"" << ID->TokenText
+ << "\", because it was used ";
+ if (Args)
+ llvm::dbgs() << "with " << Args->size();
+ else
+ llvm::dbgs() << "without";
+ llvm::dbgs() << " arguments, which doesn't match any definition.\n";
+ });
+ Tokens->setPosition(Position);
+ FormatTok = ID;
+ }
+ }
+
if (!FormatTok->is(tok::comment)) {
distributeComments(Comments, FormatTok);
Comments.clear();
@@ -4483,6 +4648,71 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
Comments.clear();
}
+namespace {
+template <typename Iterator>
+void pushTokens(Iterator Begin, Iterator End,
+ llvm::SmallVectorImpl<FormatToken *> &Into) {
+ for (auto I = Begin; I != End; ++I) {
+ Into.push_back(I->Tok);
+ for (const auto &Child : I->Children)
+ pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into);
+ }
+}
+} // namespace
+
+std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
+UnwrappedLineParser::parseMacroCall() {
+ std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args;
+ assert(Line->Tokens.empty());
+ nextToken();
+ if (!FormatTok->is(tok::l_paren))
+ return Args;
+ unsigned Position = Tokens->getPosition();
+ FormatToken *Tok = FormatTok;
+ nextToken();
+ Args.emplace();
+ auto ArgStart = std::prev(Line->Tokens.end());
+
+ int Parens = 0;
+ do {
+ switch (FormatTok->Tok.getKind()) {
+ case tok::l_paren:
+ ++Parens;
+ nextToken();
+ break;
+ case tok::r_paren: {
+ if (Parens > 0) {
+ --Parens;
+ nextToken();
+ break;
+ }
+ Args->push_back({});
+ pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
+ nextToken();
+ return Args;
+ }
+ case tok::comma: {
+ if (Parens > 0) {
+ nextToken();
+ break;
+ }
+ Args->push_back({});
+ pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back());
+ nextToken();
+ ArgStart = std::prev(Line->Tokens.end());
+ break;
+ }
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+ Line->Tokens.resize(1);
+ Tokens->setPosition(Position);
+ FormatTok = Tok;
+ return {};
+}
+
void UnwrappedLineParser::pushToken(FormatToken *Tok) {
Line->Tokens.push_back(UnwrappedLineNode(Tok));
if (MustBreakBeforeNextToken) {
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index 77277471f7e3c..20ea2956058b9 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -15,10 +15,14 @@
#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
+#include "Encoding.h"
#include "FormatToken.h"
+#include "Macros.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Format/Format.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/Regex.h"
#include <list>
#include <stack>
@@ -76,6 +80,19 @@ struct UnwrappedLine {
unsigned FirstStartColumn = 0;
};
+/// Interface for users of the UnwrappedLineParser to receive the parsed lines.
+/// Parsing a single snippet of code can lead to multiple runs, where each
+/// run is a coherent view of the file.
+///
+/// For example,
diff erent runs are generated:
+/// - for
diff erent combinations of #if blocks
+/// - when macros are involved, for the expanded code and the as-written code
+///
+/// Some tokens will only be visible in a subset of the runs.
+/// For each run, \c UnwrappedLineParser will call \c consumeUnwrappedLine
+/// for each parsed unwrapped line, and then \c finishRun to indicate
+/// that the set of unwrapped lines before is one coherent view of the
+/// code snippet to be formatted.
class UnwrappedLineConsumer {
public:
virtual ~UnwrappedLineConsumer() {}
@@ -87,10 +104,12 @@ class FormatTokenSource;
class UnwrappedLineParser {
public:
- UnwrappedLineParser(const FormatStyle &Style,
+ UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style,
const AdditionalKeywords &Keywords,
unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens,
- UnwrappedLineConsumer &Callback);
+ UnwrappedLineConsumer &Callback,
+ llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+ IdentifierTable &IdentTable);
void parse();
@@ -193,6 +212,8 @@ class UnwrappedLineParser {
unsigned parseVerilogHierarchyHeader();
void parseVerilogTable();
void parseVerilogCaseLabel();
+ std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
+ parseMacroCall();
// Used by addUnwrappedLine to denote whether to keep or remove a level
// when resetting the line state.
@@ -236,16 +257,49 @@ class UnwrappedLineParser {
bool isOnNewLine(const FormatToken &FormatTok);
+ // Returns whether there is a macro expansion in the line, i.e. a token that
+ // was expanded from a macro call.
+ bool containsExpansion(const UnwrappedLine &Line) const;
+
// Compute hash of the current preprocessor branch.
// This is used to identify the
diff erent branches, and thus track if block
// open and close in the same branch.
size_t computePPHash() const;
+ bool parsingPPDirective() const { return CurrentLines != &Lines; }
+
// FIXME: We are constantly running into bugs where Line.Level is incorrectly
// subtracted from beyond 0. Introduce a method to subtract from Line.Level
// and use that everywhere in the Parser.
std::unique_ptr<UnwrappedLine> Line;
+ // Lines that are created by macro expansion.
+ // When formatting code containing macro calls, we first format the expanded
+ // lines to set the token types correctly. Afterwards, we format the
+ // reconstructed macro calls, re-using the token types determined in the first
+ // step.
+ // ExpandedLines will be reset every time we create a new LineAndExpansion
+ // instance once a line containing macro calls has been parsed.
+ SmallVector<UnwrappedLine, 8> CurrentExpandedLines;
+
+ // Maps from the first token of a top-level UnwrappedLine that contains
+ // a macro call to the replacement UnwrappedLines expanded from the macro
+ // call.
+ llvm::DenseMap<FormatToken *, SmallVector<UnwrappedLine, 8>> ExpandedLines;
+
+ // Map from the macro identifier to a line containing the full unexpanded
+ // macro call.
+ llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
+
+ // For recursive macro expansions, trigger reconstruction only on the
+ // outermost expansion.
+ bool InExpansion = false;
+
+ // Set while we reconstruct a macro call.
+ // For reconstruction, we feed the expanded lines into the reconstructor
+ // until it is finished.
+ std::optional<MacroCallReconstructor> Reconstruct;
+
// Comments are sorted into unwrapped lines by whether they are in the same
// line as the previous token, or not. If not, they belong to the next token.
// Since the next token might already be in a new unwrapped line, we need to
@@ -345,13 +399,17 @@ class UnwrappedLineParser {
// does not start at the beginning of the file.
unsigned FirstStartColumn;
+ MacroExpander Macros;
+
friend class ScopedLineState;
friend class CompoundStatementIndenter;
};
struct UnwrappedLineNode {
UnwrappedLineNode() : Tok(nullptr) {}
- UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
+ UnwrappedLineNode(FormatToken *Tok,
+ llvm::ArrayRef<UnwrappedLine> Children = {})
+ : Tok(Tok), Children(Children.begin(), Children.end()) {}
FormatToken *Tok;
SmallVector<UnwrappedLine, 0> Children;
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index fa55ac55da03e..d2be3b3a77df4 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -49,7 +49,7 @@ void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
unsigned Spaces,
unsigned StartOfTokenColumn,
bool IsAligned, bool InPPDirective) {
- if (Tok.Finalized)
+ if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
return;
Tok.setDecision((Newlines > 0) ? FD_Break : FD_Continue);
Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange,
@@ -60,7 +60,7 @@ void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
bool InPPDirective) {
- if (Tok.Finalized)
+ if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
return;
Changes.push_back(Change(Tok, /*CreateReplacement=*/false,
Tok.WhitespaceRange, /*Spaces=*/0,
@@ -84,7 +84,7 @@ void WhitespaceManager::replaceWhitespaceInToken(
const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
unsigned Newlines, int Spaces) {
- if (Tok.Finalized)
+ if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg))
return;
SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
Changes.push_back(
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index cefc4a536a67f..97a0bfae2701f 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -22576,6 +22576,244 @@ TEST_F(FormatTest, MergeLessLessAtEnd) {
"aaaallvm::outs()\n <<");
}
+TEST_F(FormatTest, UnexpandConfiguredMacros) {
+ FormatStyle Style = getLLVMStyle();
+ Style.Macros.push_back("CLASS=class C {");
+ Style.Macros.push_back("SEMI=;");
+ Style.Macros.push_back("STMT=f();");
+ Style.Macros.push_back("ID(x)=x");
+ Style.Macros.push_back("ID3(x, y, z)=x y z");
+ Style.Macros.push_back("CALL(x)=f([] { x })");
+ Style.Macros.push_back("ASSIGN_OR_RETURN(a, b)=a = (b)");
+ Style.Macros.push_back("ASSIGN_OR_RETURN(a, b, c)=a = (b); if (x) return c");
+ Style.Macros.push_back("MOCK_METHOD(r, n, a, s)=r n a s");
+
+ verifyFormat("ID(nested(a(b, c), d))", Style);
+ verifyFormat("CLASS\n"
+ " a *b;\n"
+ "};",
+ Style);
+ verifyFormat("SEMI\n"
+ "SEMI\n"
+ "SEMI",
+ Style);
+ verifyFormat("STMT\n"
+ "STMT\n"
+ "STMT",
+ Style);
+ verifyFormat("void f() { ID(a *b); }", Style);
+ verifyFormat(R"(ID(
+ { ID(a *b); });
+)",
+ Style);
+ verifyIncompleteFormat(R"(ID3({, ID(a *b),
+ ;
+ });
+)",
+ Style);
+
+ verifyFormat("ID(CALL(CALL(return a * b;)));", Style);
+
+ verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n"
+ " MySomewhatLongFunction(SomethingElse()));\n",
+ Style);
+ verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n"
+ " MySomewhatLongFunction(SomethingElse()), "
+ "ReturnMe());\n",
+ Style);
+
+ verifyFormat(R"(
+#define MACRO(a, b) ID(a + b)
+)",
+ Style);
+ EXPECT_EQ(R"(
+int a;
+int b;
+int c;
+int d;
+int e;
+int f;
+ID(
+ namespace foo {
+ int a;
+ }
+) // namespace k
+)",
+ format(R"(
+int a;
+int b;
+int c;
+int d;
+int e;
+int f;
+ID(namespace foo { int a; }) // namespace k
+)",
+ Style));
+ verifyFormat(R"(ID(
+ //
+ ({ ; }))
+)",
+ Style);
+
+ Style.ColumnLimit = 35;
+ // FIXME: Arbitrary formatting of macros where the end of the logical
+ // line is in the middle of a macro call are not working yet.
+ verifyFormat(R"(ID(
+ void f();
+ void)
+ID(g) ID(()) ID(
+ ;
+ void g();)
+)",
+ Style);
+
+ Style.ColumnLimit = 10;
+ verifyFormat("STMT\n"
+ "STMT\n"
+ "STMT",
+ Style);
+
+ EXPECT_EQ(R"(
+ID(CALL(CALL(
+ a *b)));
+)",
+ format(R"(
+ID(CALL(CALL(a * b)));
+)",
+ Style));
+
+ // FIXME: If we want to support unbalanced braces or parens from macro
+ // expansions we need to re-think how we propagate errors in
+ // TokenAnnotator::parseLine; for investigation, switching the inner loop of
+ // TokenAnnotator::parseLine to return LT_Other instead of LT_Invalid in case
+ // of !consumeToken() changes the formatting of the test below and makes it
+ // believe it has a fully correct formatting.
+ EXPECT_EQ(R"(
+ID3(
+ {
+ CLASS
+ a *b;
+ };
+ },
+ ID(x *y);
+ ,
+ STMT
+ STMT
+ STMT)
+void f();
+)",
+ format(R"(
+ID3({CLASS a*b; };}, ID(x*y);, STMT STMT STMT)
+void f();
+)",
+ Style));
+
+ verifyFormat("ID(a(\n"
+ "#ifdef A\n"
+ " b, c\n"
+ "#else\n"
+ " d(e)\n"
+ "#endif\n"
+ " ))",
+ Style);
+ Style.ColumnLimit = 80;
+ verifyFormat(R"(ASSIGN_OR_RETURN(
+ // Comment
+ a b, c);
+)",
+ Style);
+ Style.ColumnLimit = 30;
+ verifyFormat(R"(ASSIGN_OR_RETURN(
+ // Comment
+ //
+ a b,
+ xxxxxxxxxxxx(
+ yyyyyyyyyyyyyyyyy,
+ zzzzzzzzzzzzzzzzzz),
+ f([]() {
+ a();
+ b();
+ }));
+)",
+ Style);
+ verifyFormat(R"(int a = []() {
+ ID(
+ x;
+ y;
+ z;)
+ ;
+}();
+)",
+ Style);
+ EXPECT_EQ(
+ R"(ASSIGN_OR_RETURN((
+====
+#))
+})",
+ format(R"(ASSIGN_OR_RETURN((
+====
+#))
+})",
+ Style, SC_ExpectIncomplete));
+ EXPECT_EQ(R"(ASSIGN_OR_RETURN(
+}
+(
+====
+#),
+ a))",
+ format(R"(ASSIGN_OR_RETURN(
+}
+(
+====
+#),
+a))",
+ Style, SC_ExpectIncomplete));
+ EXPECT_EQ(R"(ASSIGN_OR_RETURN(a
+//
+====
+#
+ <))",
+ format(R"(ASSIGN_OR_RETURN(a
+//
+====
+#
+ <))",
+ Style));
+ verifyFormat("class C {\n"
+ " MOCK_METHOD(R, f,\n"
+ " (a *b, c *d),\n"
+ " (override));\n"
+ "};",
+ Style);
+}
+
+TEST_F(FormatTest, KeepParensWhenExpandingObjectLikeMacros) {
+ FormatStyle Style = getLLVMStyle();
+ Style.Macros.push_back("FN=class C { int f");
+ verifyFormat("void f() {\n"
+ " FN(a *b);\n"
+ " };\n"
+ "}",
+ Style);
+}
+
+TEST_F(FormatTest, DoesNotExpandFunctionLikeMacrosWithoutParens) {
+ FormatStyle Style = getLLVMStyle();
+ Style.Macros.push_back("CLASS()=class C {");
+ verifyFormat("CLASS void f();\n"
+ "}\n"
+ ";",
+ Style);
+}
+
+TEST_F(FormatTest, ContinueFormattingAfterUnclosedParensAfterObjectLikeMacro) {
+ FormatStyle Style = getLLVMStyle();
+ Style.Macros.push_back("O=class {");
+ verifyIncompleteFormat("O(auto x = [](){\n"
+ " f();}",
+ Style);
+}
+
TEST_F(FormatTest, HandleUnbalancedImplicitBracesAcrossPPBranches) {
std::string code = "#if A\n"
"#if B\n"
diff --git a/clang/unittests/Format/MacroCallReconstructorTest.cpp b/clang/unittests/Format/MacroCallReconstructorTest.cpp
index eee980bae8d8b..6e6900577d165 100644
--- a/clang/unittests/Format/MacroCallReconstructorTest.cpp
+++ b/clang/unittests/Format/MacroCallReconstructorTest.cpp
@@ -33,13 +33,31 @@ class Expansion {
TokenList
expand(llvm::StringRef Name,
const SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> &Args) {
+ return expandInternal(Name, Args);
+ }
+
+ TokenList expand(llvm::StringRef Name) { return expandInternal(Name, {}); }
+
+ TokenList expand(llvm::StringRef Name, const std::vector<std::string> &Args) {
+ return expandInternal(Name, lexArgs(Args));
+ }
+
+ const UnexpandedMap &getUnexpanded() const { return Unexpanded; }
+
+ const TokenList &getTokens() const { return Tokens; }
+
+private:
+ TokenList expandInternal(
+ llvm::StringRef Name,
+ const std::optional<SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>>
+ &Args) {
auto *ID = Lex.id(Name);
auto UnexpandedLine = std::make_unique<UnwrappedLine>();
UnexpandedLine->Tokens.push_back(ID);
- if (!Args.empty()) {
+ if (Args && !Args->empty()) {
UnexpandedLine->Tokens.push_back(Lex.id("("));
- for (auto I = Args.begin(), E = Args.end(); I != E; ++I) {
- if (I != Args.begin())
+ for (auto I = Args->begin(), E = Args->end(); I != E; ++I) {
+ if (I != Args->begin())
UnexpandedLine->Tokens.push_back(Lex.id(","));
UnexpandedLine->Tokens.insert(UnexpandedLine->Tokens.end(), I->begin(),
I->end());
@@ -57,16 +75,6 @@ class Expansion {
return UnexpandedTokens;
}
- TokenList expand(llvm::StringRef Name,
- const std::vector<std::string> &Args = {}) {
- return expand(Name, lexArgs(Args));
- }
-
- const UnexpandedMap &getUnexpanded() const { return Unexpanded; }
-
- const TokenList &getTokens() const { return Tokens; }
-
-private:
llvm::SmallVector<TokenList, 1>
lexArgs(const std::vector<std::string> &Args) {
llvm::SmallVector<TokenList, 1> Result;
@@ -563,34 +571,6 @@ TEST_F(MacroCallReconstructorTest, ParentOutsideMacroCall) {
EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected));
}
-TEST_F(MacroCallReconstructorTest, UnusedMacroArguments) {
- auto Macros = createExpander({"X=x"});
- Expansion Exp(Lex, *Macros);
- TokenList Call = Exp.expand("X", {"a", "b", "c"});
-
- MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
- Unexp.addLine(line(Exp.getTokens()));
- EXPECT_TRUE(Unexp.finished());
- Matcher U(Call, Lex);
- EXPECT_THAT(std::move(Unexp).takeResult(),
- matchesLine(line(U.consume("X(a, b, c)"))));
-}
-
-TEST_F(MacroCallReconstructorTest, UnusedEmptyMacroArgument) {
- auto Macros = createExpander({"X=x"});
- Expansion Exp(Lex, *Macros);
- TokenList Call = Exp.expand("X", {std::string("")});
-
- MacroCallReconstructor Unexp(0, Exp.getUnexpanded());
- Matcher E(Exp.getTokens(), Lex);
- auto Semi = tokens(";");
- Unexp.addLine(line({E.consume("x"), Semi}));
- EXPECT_TRUE(Unexp.finished());
- Matcher U(Call, Lex);
- EXPECT_THAT(std::move(Unexp).takeResult(),
- matchesLine(line({U.consume("X()"), Semi})));
-}
-
TEST_F(MacroCallReconstructorTest, ChildrenSplitAcrossArguments) {
auto Macros = createExpander({"CALL(a, b)=f([]() a b)"});
Expansion Exp(Lex, *Macros);
@@ -655,7 +635,7 @@ TEST_F(MacroCallReconstructorTest, ChildrenAfterMacroCall) {
}
TEST_F(MacroCallReconstructorTest, InvalidCodeSplittingBracesAcrossArgs) {
- auto Macros = createExpander({"M(a, b)=(a) (b)"});
+ auto Macros = createExpander({"M(a, b, c)=(a) (b) c"});
Expansion Exp(Lex, *Macros);
TokenList Call = Exp.expand("M", {std::string("{"), "x", ""});
diff --git a/clang/unittests/Format/MacroExpanderTest.cpp b/clang/unittests/Format/MacroExpanderTest.cpp
index 37fa8d1cfc179..72302aa0cea7e 100644
--- a/clang/unittests/Format/MacroExpanderTest.cpp
+++ b/clang/unittests/Format/MacroExpanderTest.cpp
@@ -19,9 +19,16 @@ class MacroExpanderTest : public ::testing::Test {
Lex.Allocator, Lex.IdentTable);
}
+ std::string expand(MacroExpander &Macros, llvm::StringRef Name) {
+ EXPECT_TRUE(Macros.defined(Name))
+ << "Macro not defined: \"" << Name << "\"";
+ return text(Macros.expand(Lex.id(Name), {}));
+ }
+
std::string expand(MacroExpander &Macros, llvm::StringRef Name,
- const std::vector<std::string> &Args = {}) {
- EXPECT_TRUE(Macros.defined(Name));
+ const std::vector<std::string> &Args) {
+ EXPECT_TRUE(Macros.defined(Name))
+ << "Macro not defined: \"" << Name << "\"";
return text(Macros.expand(Lex.id(Name), lexArgs(Args)));
}
@@ -95,7 +102,7 @@ TEST_F(MacroExpanderTest, ExpandsWithoutArguments) {
EXPECT_EQ("", expand(*Macros, "A"));
EXPECT_EQ("b", expand(*Macros, "B"));
EXPECT_EQ("c+c", expand(*Macros, "C"));
- EXPECT_EQ("", expand(*Macros, "D"));
+ EXPECT_EQ("", expand(*Macros, "D", {}));
}
TEST_F(MacroExpanderTest, ExpandsWithArguments) {
@@ -105,7 +112,6 @@ TEST_F(MacroExpanderTest, ExpandsWithArguments) {
});
EXPECT_EQ("", expand(*Macros, "A", {"a"}));
EXPECT_EQ("b1+b2+b3", expand(*Macros, "B", {"b1", "b2 + b3"}));
- EXPECT_EQ("x+", expand(*Macros, "B", {"x"}));
}
TEST_F(MacroExpanderTest, AttributizesTokens) {
@@ -200,6 +206,14 @@ TEST_F(MacroExpanderTest, UnderstandsCppTokens) {
EXPECT_ATTRIBUTES(Result, Attributes);
}
+TEST_F(MacroExpanderTest, Overloads) {
+ auto Macros = create({"A=x", "A()=y", "A(a)=a", "A(a, b)=a b"});
+ EXPECT_EQ("x", expand(*Macros, "A"));
+ EXPECT_EQ("y", expand(*Macros, "A", {}));
+ EXPECT_EQ("z", expand(*Macros, "A", {"z"}));
+ EXPECT_EQ("xy", expand(*Macros, "A", {"x", "y"}));
+}
+
} // namespace
} // namespace format
} // namespace clang
diff --git a/clang/unittests/Format/TestLexer.h b/clang/unittests/Format/TestLexer.h
index a1585fc5cac6d..8b5949b32fc9e 100644
--- a/clang/unittests/Format/TestLexer.h
+++ b/clang/unittests/Format/TestLexer.h
@@ -72,7 +72,8 @@ class TestLexer : public UnwrappedLineConsumer {
TokenList annotate(llvm::StringRef Code) {
FormatTokenLexer Lex = getNewLexer(Code);
auto Tokens = Lex.lex();
- UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this);
+ UnwrappedLineParser Parser(SourceMgr.get(), Style, Lex.getKeywords(), 0,
+ Tokens, *this, Allocator, IdentTable);
Parser.parse();
TokenAnnotator Annotator(Style, Lex.getKeywords());
for (auto &Line : UnwrappedLines) {
More information about the cfe-commits
mailing list