[clang] e336b74 - [clang-format] Add a MacroExpander.

Manuel Klimek via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 25 05:09:14 PDT 2020


Author: Manuel Klimek
Date: 2020-09-25T14:08:13+02:00
New Revision: e336b74c995d665bc3fb75164375bbb0f78f516c

URL: https://github.com/llvm/llvm-project/commit/e336b74c995d665bc3fb75164375bbb0f78f516c
DIFF: https://github.com/llvm/llvm-project/commit/e336b74c995d665bc3fb75164375bbb0f78f516c.diff

LOG: [clang-format] Add a MacroExpander.

Summary:
The MacroExpander allows to expand simple (non-resursive) macro
definitions from a macro identifier token and macro arguments. It
annotates the tokens with a newly introduced MacroContext that keeps
track of the role a token played in expanding the macro in order to
be able to reconstruct the macro expansion from an expanded (formatted)
token stream.

Made Token explicitly copy-able to enable copying tokens from the parsed
macro definition.

Reviewers: sammccall

Subscribers: mgorny, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83296

Added: 
    clang/lib/Format/MacroExpander.cpp
    clang/lib/Format/Macros.h
    clang/unittests/Format/MacroExpanderTest.cpp
    clang/unittests/Format/TestLexer.h

Modified: 
    clang/lib/Format/CMakeLists.txt
    clang/lib/Format/FormatToken.h
    clang/unittests/Format/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 0019d045cd06..ec1522db7e87 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -7,6 +7,7 @@ add_clang_library(clangFormat
   Format.cpp
   FormatToken.cpp
   FormatTokenLexer.cpp
+  MacroExpander.cpp
   NamespaceEndCommentsFixer.cpp
   SortJavaScriptImports.cpp
   TokenAnalyzer.cpp

diff  --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 76ef99e72d58..c6af71a768a1 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -136,6 +136,68 @@ enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive };
 
 enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break };
 
+/// Roles a token can take in a configured macro expansion.
+enum MacroRole {
+  /// The token was expanded from a macro argument when formatting the expanded
+  /// token sequence.
+  MR_ExpandedArg,
+  /// The token is part of a macro argument that was previously formatted as
+  /// expansion when formatting the unexpanded macro call.
+  MR_UnexpandedArg,
+  /// The token was expanded from a macro definition, and is not visible as part
+  /// of the macro call.
+  MR_Hidden,
+};
+
+struct FormatToken;
+
+/// Contains information on the token's role in a macro expansion.
+///
+/// Given the following definitions:
+/// A(X) = [ X ]
+/// B(X) = < X >
+/// C(X) = X
+///
+/// Consider the macro call:
+/// A({B(C(C(x)))}) -> [{<x>}]
+///
+/// In this case, the tokens of the unexpanded macro call will have the
+/// following relevant entries in their macro context (note that formatting
+/// the unexpanded macro call happens *after* formatting the expanded macro
+/// call):
+///                   A( { B( C( C(x) ) ) } )
+/// Role:             NN U NN NN NNUN N N U N  (N=None, U=UnexpandedArg)
+///
+///                   [  { <       x    > } ]
+/// Role:             H  E H       E    H E H  (H=Hidden, E=ExpandedArg)
+/// ExpandedFrom[0]:  A  A A       A    A A A
+/// ExpandedFrom[1]:       B       B    B
+/// ExpandedFrom[2]:               C
+/// ExpandedFrom[3]:               C
+/// StartOfExpansion: 1  0 1       2    0 0 0
+/// EndOfExpansion:   0  0 0       2    1 0 1
+struct MacroExpansion {
+  MacroExpansion(MacroRole Role) : Role(Role) {}
+
+  /// The token's role in the macro expansion.
+  /// When formatting an expanded macro, all tokens that are part of macro
+  /// arguments will be MR_ExpandedArg, while all tokens that are not visible in
+  /// the macro call will be MR_Hidden.
+  /// When formatting an unexpanded macro call, all tokens that are part of
+  /// macro arguments will be MR_UnexpandedArg.
+  MacroRole Role;
+
+  /// The stack of macro call identifier tokens this token was expanded from.
+  llvm::SmallVector<FormatToken *, 1> ExpandedFrom;
+
+  /// The number of expansions of which this macro is the first entry.
+  unsigned StartOfExpansion = 0;
+
+  /// The number of currently open expansions in \c ExpandedFrom this macro is
+  /// the last token in.
+  unsigned EndOfExpansion = 0;
+};
+
 class TokenRole;
 class AnnotatedLine;
 
@@ -163,7 +225,9 @@ struct FormatToken {
 
   /// A token can have a special role that can carry extra information
   /// about the token's formatting.
-  std::unique_ptr<TokenRole> Role;
+  /// FIXME: Make FormatToken for parsing and AnnotatedToken two 
diff erent
+  /// classes and make this a unique_ptr in the AnnotatedToken class.
+  std::shared_ptr<TokenRole> Role;
 
   /// The range of the whitespace immediately preceding the \c Token.
   SourceRange WhitespaceRange;
@@ -378,6 +442,10 @@ struct FormatToken {
   /// in it.
   SmallVector<AnnotatedLine *, 1> Children;
 
+  // Contains all attributes related to how this token takes part
+  // in a configured macro expansion.
+  llvm::Optional<MacroExpansion> MacroCtx;
+
   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
   bool is(TokenType TT) const { return getType() == TT; }
   bool is(const IdentifierInfo *II) const {
@@ -631,10 +699,12 @@ struct FormatToken {
                : nullptr;
   }
 
+  void copyFrom(const FormatToken &Tok) { *this = Tok; }
+
 private:
-  // Disallow copying.
+  // Only allow copying via the explicit copyFrom method.
   FormatToken(const FormatToken &) = delete;
-  void operator=(const FormatToken &) = delete;
+  FormatToken &operator=(const FormatToken &) = default;
 
   template <typename A, typename... Ts>
   bool startsSequenceInternal(A K1, Ts... Tokens) const {

diff  --git a/clang/lib/Format/MacroExpander.cpp b/clang/lib/Format/MacroExpander.cpp
new file mode 100644
index 000000000000..c00fc209deb5
--- /dev/null
+++ b/clang/lib/Format/MacroExpander.cpp
@@ -0,0 +1,225 @@
+//===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of MacroExpander, which handles macro
+/// configuration and expansion while formatting.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Macros.h"
+
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "FormatTokenLexer.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Format/Format.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace clang {
+namespace format {
+
+struct MacroExpander::Definition {
+  StringRef Name;
+  SmallVector<FormatToken *, 8> Params;
+  SmallVector<FormatToken *, 8> Body;
+
+  // Map from each argument's name to its position in the argument list.
+  // With "M(x, y) x + y":
+  //   x -> 0
+  //   y -> 1
+  llvm::StringMap<size_t> ArgMap;
+
+  bool ObjectLike = true;
+};
+
+class MacroExpander::DefinitionParser {
+public:
+  DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
+    assert(!Tokens.empty());
+    Current = Tokens[0];
+  }
+
+  // Parse the token stream and return the corresonding Definition object.
+  // Returns an empty definition object with a null-Name on error.
+  MacroExpander::Definition parse() {
+    if (!Current->is(tok::identifier))
+      return {};
+    Def.Name = Current->TokenText;
+    nextToken();
+    if (Current->is(tok::l_paren)) {
+      Def.ObjectLike = false;
+      if (!parseParams())
+        return {};
+    }
+    if (!parseExpansion())
+      return {};
+
+    return Def;
+  }
+
+private:
+  bool parseParams() {
+    assert(Current->is(tok::l_paren));
+    nextToken();
+    while (Current->is(tok::identifier)) {
+      Def.Params.push_back(Current);
+      Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
+      nextToken();
+      if (Current->isNot(tok::comma))
+        break;
+      nextToken();
+    }
+    if (Current->isNot(tok::r_paren))
+      return false;
+    nextToken();
+    return true;
+  }
+
+  bool parseExpansion() {
+    if (!Current->isOneOf(tok::equal, tok::eof))
+      return false;
+    if (Current->is(tok::equal))
+      nextToken();
+    parseTail();
+    return true;
+  }
+
+  void parseTail() {
+    while (Current->isNot(tok::eof)) {
+      Def.Body.push_back(Current);
+      nextToken();
+    }
+    Def.Body.push_back(Current);
+  }
+
+  void nextToken() {
+    if (Pos + 1 < Tokens.size())
+      ++Pos;
+    Current = Tokens[Pos];
+    Current->Finalized = true;
+  }
+
+  size_t Pos = 0;
+  FormatToken *Current = nullptr;
+  Definition Def;
+  ArrayRef<FormatToken *> Tokens;
+};
+
+MacroExpander::MacroExpander(
+    const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
+    const FormatStyle &Style,
+    llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+    IdentifierTable &IdentTable)
+    : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
+      IdentTable(IdentTable) {
+  for (const std::string &Macro : Macros) {
+    parseDefinition(Macro);
+  }
+}
+
+MacroExpander::~MacroExpander() = default;
+
+void MacroExpander::parseDefinition(const std::string &Macro) {
+  Buffers.push_back(
+      llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
+  clang::FileID FID =
+      SourceMgr.createFileID(SourceManager::Unowned, Buffers.back().get());
+  FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
+                       Allocator, IdentTable);
+  const auto Tokens = Lex.lex();
+  if (!Tokens.empty()) {
+    DefinitionParser Parser(Tokens);
+    auto Definition = Parser.parse();
+    Definitions[Definition.Name] = std::move(Definition);
+  }
+}
+
+bool MacroExpander::defined(llvm::StringRef Name) const {
+  return Definitions.find(Name) != Definitions.end();
+}
+
+bool MacroExpander::objectLike(llvm::StringRef Name) const {
+  return Definitions.find(Name)->second.ObjectLike;
+}
+
+llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
+                                                          ArgsList Args) const {
+  assert(defined(ID->TokenText));
+  SmallVector<FormatToken *, 8> Result;
+  const Definition &Def = Definitions.find(ID->TokenText)->second;
+
+  // Expand each argument at most once.
+  llvm::StringSet<> ExpandedArgs;
+
+  // Adds the given token to Result.
+  auto pushToken = [&](FormatToken *Tok) {
+    Tok->MacroCtx->ExpandedFrom.push_back(ID);
+    Result.push_back(Tok);
+  };
+
+  // If Tok references a parameter, adds the corresponding argument to Result.
+  // Returns false if Tok does not reference a parameter.
+  auto expandArgument = [&](FormatToken *Tok) -> bool {
+    // If the current token references a parameter, expand the corresponding
+    // argument.
+    if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
+      return false;
+    ExpandedArgs.insert(Tok->TokenText);
+    auto I = Def.ArgMap.find(Tok->TokenText);
+    if (I == Def.ArgMap.end())
+      return false;
+    // If there are fewer arguments than referenced parameters, treat the
+    // parameter as empty.
+    // FIXME: Potentially fully abort the expansion instead.
+    if (I->getValue() >= Args.size())
+      return true;
+    for (FormatToken *Arg : Args[I->getValue()]) {
+      // A token can be part of a macro argument at multiple levels.
+      // For example, with "ID(x) x":
+      // in ID(ID(x)), 'x' is expanded first as argument to the inner
+      // ID, then again as argument to the outer ID. We keep the macro
+      // role the token had from the inner expansion.
+      if (!Arg->MacroCtx)
+        Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
+      pushToken(Arg);
+    }
+    return true;
+  };
+
+  // Expand the definition into Result.
+  for (FormatToken *Tok : Def.Body) {
+    if (expandArgument(Tok))
+      continue;
+    // Create a copy of the tokens from the macro body, i.e. were not provided
+    // by user code.
+    FormatToken *New = new (Allocator.Allocate()) FormatToken;
+    New->copyFrom(*Tok);
+    assert(!New->MacroCtx);
+    // Tokens that are not part of the user code are not formatted.
+    New->MacroCtx = MacroExpansion(MR_Hidden);
+    pushToken(New);
+  }
+  assert(Result.size() >= 1 && Result.back()->is(tok::eof));
+  if (Result.size() > 1) {
+    ++Result[0]->MacroCtx->StartOfExpansion;
+    ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
+  }
+  return Result;
+}
+
+} // namespace format
+} // namespace clang

diff  --git a/clang/lib/Format/Macros.h b/clang/lib/Format/Macros.h
new file mode 100644
index 000000000000..591ef8b5be3c
--- /dev/null
+++ b/clang/lib/Format/Macros.h
@@ -0,0 +1,141 @@
+//===--- MacroExpander.h - Format C++ code ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the main building blocks of macro support in
+/// clang-format.
+///
+/// In order to not violate the requirement that clang-format can format files
+/// in isolation, clang-format's macro support uses expansions users provide
+/// as part of clang-format's style configuration.
+///
+/// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support
+/// one level of expansion (\see MacroExpander for a full description of what
+/// is supported).
+///
+/// As part of parsing, clang-format uses the MacroExpander to expand the
+/// spelled token streams into expanded token streams when it encounters a
+/// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
+/// from the expanded token stream.
+/// After the expanded unwrapped lines are parsed, the MacroUnexpander matches
+/// the spelled token stream into unwrapped lines that best resemble the
+/// structure of the expanded unwrapped lines.
+///
+/// When formatting, clang-format formats the expanded unwrapped lines first,
+/// determining the token types. Next, it formats the spelled unwrapped lines,
+/// keeping the token types fixed, while allowing other formatting decisions
+/// to change.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_LIB_FORMAT_MACROS_H
+#define CLANG_LIB_FORMAT_MACROS_H
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "Encoding.h"
+#include "FormatToken.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class MemoryBuffer;
+} // namespace llvm
+
+namespace clang {
+class IdentifierTable;
+class SourceManager;
+
+namespace format {
+struct FormatStyle;
+
+/// Takes a set of macro definitions as strings and allows expanding calls to
+/// those macros.
+///
+/// For example:
+/// Definition: A(x, y)=x + y
+/// Call      : A(int a = 1, 2)
+/// Expansion : int a = 1 + 2
+///
+/// Expansion does not check arity of the definition.
+/// If fewer arguments than expected are provided, the remaining parameters
+/// are considered empty:
+/// Call     : A(a)
+/// Expansion: a +
+/// If more arguments than expected are provided, they will be discarded.
+///
+/// The expander does not support:
+/// - recursive expansion
+/// - stringification
+/// - concatenation
+/// - variadic macros
+///
+/// Furthermore, only a single expansion of each macro argument is supported,
+/// so that we cannot get conflicting formatting decisions from 
diff erent
+/// expansions.
+/// Definition: A(x)=x+x
+/// Call      : A(id)
+/// Expansion : id+x
+///
+class MacroExpander {
+public:
+  using ArgsList = llvm::ArrayRef<llvm::SmallVector<FormatToken *, 8>>;
+
+  /// Construct a macro expander from a set of macro definitions.
+  /// Macro definitions must be encoded as UTF-8.
+  ///
+  /// Each entry in \p Macros must conform to the following simple
+  /// macro-definition language:
+  /// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion>
+  /// <params>     ::= <id-list> | ""
+  /// <id-list>    ::= <id> | <id> "," <params>
+  /// <expansion>  ::= "=" <tail> | <eof>
+  /// <tail>       ::= <tok> <tail> | <eof>
+  ///
+  /// Macros that cannot be parsed will be silently discarded.
+  ///
+  MacroExpander(const std::vector<std::string> &Macros,
+                clang::SourceManager &SourceMgr, const FormatStyle &Style,
+                llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
+                IdentifierTable &IdentTable);
+  ~MacroExpander();
+
+  /// Returns whether a macro \p Name is defined.
+  bool defined(llvm::StringRef Name) const;
+
+  /// Returns whether the macro has no arguments and should not consume
+  /// subsequent parentheses.
+  bool objectLike(llvm::StringRef Name) const;
+
+  /// Returns the expanded stream of format tokens for \p ID, where
+  /// each element in \p Args is a positional argument to the macro call.
+  llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID,
+                                             ArgsList Args) const;
+
+private:
+  struct Definition;
+  class DefinitionParser;
+
+  void parseDefinition(const std::string &Macro);
+
+  clang::SourceManager &SourceMgr;
+  const FormatStyle &Style;
+  llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
+  IdentifierTable &IdentTable;
+  std::vector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
+  llvm::StringMap<Definition> Definitions;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif

diff  --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index d02734a48b7e..d0cc2cae179f 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -15,6 +15,7 @@ add_clang_unittest(FormatTests
   FormatTestSelective.cpp
   FormatTestTableGen.cpp
   FormatTestTextProto.cpp
+  MacroExpanderTest.cpp
   NamespaceEndCommentsFixerTest.cpp
   SortImportsTestJS.cpp
   SortImportsTestJava.cpp

diff  --git a/clang/unittests/Format/MacroExpanderTest.cpp b/clang/unittests/Format/MacroExpanderTest.cpp
new file mode 100644
index 000000000000..59c67f29bedd
--- /dev/null
+++ b/clang/unittests/Format/MacroExpanderTest.cpp
@@ -0,0 +1,187 @@
+#include "../../lib/Format/Macros.h"
+#include "TestLexer.h"
+#include "clang/Basic/FileManager.h"
+
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+
+namespace {
+
+class MacroExpanderTest : public ::testing::Test {
+public:
+  std::unique_ptr<MacroExpander>
+  create(const std::vector<std::string> &MacroDefinitions) {
+    return std::make_unique<MacroExpander>(MacroDefinitions,
+                                           Lex.SourceMgr.get(), Lex.Style,
+                                           Lex.Allocator, Lex.IdentTable);
+  }
+
+  std::string expand(MacroExpander &Macros, llvm::StringRef Name,
+                     const std::vector<std::string> &Args = {}) {
+    EXPECT_TRUE(Macros.defined(Name));
+    return text(Macros.expand(Lex.id(Name), lexArgs(Args)));
+  }
+
+  llvm::SmallVector<TokenList, 1>
+  lexArgs(const std::vector<std::string> &Args) {
+    llvm::SmallVector<TokenList, 1> Result;
+    for (const auto &Arg : Args) {
+      Result.push_back(uneof(Lex.lex(Arg)));
+    }
+    return Result;
+  }
+
+  struct MacroAttributes {
+    clang::tok::TokenKind Kind;
+    MacroRole Role;
+    unsigned Start;
+    unsigned End;
+    llvm::SmallVector<FormatToken *, 1> ExpandedFrom;
+  };
+
+  void expectAttributes(const TokenList &Tokens,
+                        const std::vector<MacroAttributes> &Attributes,
+                        const std::string &File, unsigned Line) {
+    EXPECT_EQ(Tokens.size(), Attributes.size()) << text(Tokens);
+    for (size_t I = 0, E = Tokens.size(); I != E; ++I) {
+      if (I >= Attributes.size())
+        continue;
+      std::string Context =
+          ("for token " + llvm::Twine(I) + ": " + Tokens[I]->Tok.getName() +
+           " / " + Tokens[I]->TokenText)
+              .str();
+      EXPECT_TRUE(Tokens[I]->is(Attributes[I].Kind))
+          << Context << " in " << text(Tokens) << " at " << File << ":" << Line;
+      EXPECT_EQ(Tokens[I]->MacroCtx->Role, Attributes[I].Role)
+          << Context << " in " << text(Tokens) << " at " << File << ":" << Line;
+      EXPECT_EQ(Tokens[I]->MacroCtx->StartOfExpansion, Attributes[I].Start)
+          << Context << " in " << text(Tokens) << " at " << File << ":" << Line;
+      EXPECT_EQ(Tokens[I]->MacroCtx->EndOfExpansion, Attributes[I].End)
+          << Context << " in " << text(Tokens) << " at " << File << ":" << Line;
+      EXPECT_EQ(Tokens[I]->MacroCtx->ExpandedFrom, Attributes[I].ExpandedFrom)
+          << Context << " in " << text(Tokens) << " at " << File << ":" << Line;
+    }
+  }
+
+  TestLexer Lex;
+};
+
+#define EXPECT_ATTRIBUTES(Tokens, Attributes)                                  \
+  expectAttributes(Tokens, Attributes, __FILE__, __LINE__)
+
+TEST_F(MacroExpanderTest, SkipsDefinitionOnError) {
+  auto Macros =
+      create({"A(", "B(,", "C(a,", "D(a a", "E(a, a", "F(,)", "G(a;"});
+  for (const auto *Name : {"A", "B", "C", "D", "E", "F", "G"}) {
+    EXPECT_FALSE(Macros->defined(Name)) << "for Name " << Name;
+  }
+}
+
+TEST_F(MacroExpanderTest, ExpandsWithoutArguments) {
+  auto Macros = create({
+      "A",
+      "B=b",
+      "C=c + c",
+      "D()",
+  });
+  EXPECT_TRUE(Macros->objectLike("A"));
+  EXPECT_TRUE(Macros->objectLike("B"));
+  EXPECT_TRUE(Macros->objectLike("C"));
+  EXPECT_TRUE(!Macros->objectLike("D"));
+  EXPECT_EQ("", expand(*Macros, "A"));
+  EXPECT_EQ("b", expand(*Macros, "B"));
+  EXPECT_EQ("c+c", expand(*Macros, "C"));
+  EXPECT_EQ("", expand(*Macros, "D"));
+}
+
+TEST_F(MacroExpanderTest, ExpandsWithArguments) {
+  auto Macros = create({
+      "A(x)",
+      "B(x, y)=x + y",
+  });
+  EXPECT_EQ("", expand(*Macros, "A", {"a"}));
+  EXPECT_EQ("b1+b2+b3", expand(*Macros, "B", {"b1", "b2 + b3"}));
+  EXPECT_EQ("x+", expand(*Macros, "B", {"x"}));
+}
+
+TEST_F(MacroExpanderTest, AttributizesTokens) {
+  auto Macros = create({
+      "A(x, y)={ x + y; }",
+      "B(x, y)=x + 3 + y",
+  });
+  auto *A = Lex.id("A");
+  auto AArgs = lexArgs({"a1 * a2", "a3 * a4"});
+  auto Result = Macros->expand(A, AArgs);
+  EXPECT_EQ(11U, Result.size()) << text(Result) << " / " << Result;
+  EXPECT_EQ("{a1*a2+a3*a4;}", text(Result));
+  std::vector<MacroAttributes> Attributes = {
+      {tok::l_brace, MR_Hidden, 1, 0, {A}},
+      {tok::identifier, MR_ExpandedArg, 0, 0, {A}},
+      {tok::star, MR_ExpandedArg, 0, 0, {A}},
+      {tok::identifier, MR_ExpandedArg, 0, 0, {A}},
+      {tok::plus, MR_Hidden, 0, 0, {A}},
+      {tok::identifier, MR_ExpandedArg, 0, 0, {A}},
+      {tok::star, MR_ExpandedArg, 0, 0, {A}},
+      {tok::identifier, MR_ExpandedArg, 0, 0, {A}},
+      {tok::semi, MR_Hidden, 0, 0, {A}},
+      {tok::r_brace, MR_Hidden, 0, 1, {A}},
+      {tok::eof, MR_Hidden, 0, 0, {A}},
+  };
+  EXPECT_ATTRIBUTES(Result, Attributes);
+
+  auto *B = Lex.id("B");
+  auto BArgs = lexArgs({"b1", "b2"});
+  Result = Macros->expand(B, BArgs);
+  EXPECT_EQ(6U, Result.size()) << text(Result) << " / " << Result;
+  EXPECT_EQ("b1+3+b2", text(Result));
+  Attributes = {
+      {tok::identifier, MR_ExpandedArg, 1, 0, {B}},
+      {tok::plus, MR_Hidden, 0, 0, {B}},
+      {tok::numeric_constant, MR_Hidden, 0, 0, {B}},
+      {tok::plus, MR_Hidden, 0, 0, {B}},
+      {tok::identifier, MR_ExpandedArg, 0, 1, {B}},
+      {tok::eof, MR_Hidden, 0, 0, {B}},
+  };
+  EXPECT_ATTRIBUTES(Result, Attributes);
+}
+
+TEST_F(MacroExpanderTest, RecursiveExpansion) {
+  auto Macros = create({
+      "A(x)=x",
+      "B(x)=x",
+      "C(x)=x",
+  });
+
+  auto *A = Lex.id("A");
+  auto *B = Lex.id("B");
+  auto *C = Lex.id("C");
+
+  auto Args = lexArgs({"id"});
+  auto CResult = uneof(Macros->expand(C, Args));
+  auto BResult = uneof(Macros->expand(B, CResult));
+  auto AResult = uneof(Macros->expand(A, BResult));
+
+  std::vector<MacroAttributes> Attributes = {
+      {tok::identifier, MR_ExpandedArg, 3, 3, {C, B, A}},
+  };
+  EXPECT_ATTRIBUTES(AResult, Attributes);
+}
+
+TEST_F(MacroExpanderTest, SingleExpansion) {
+  auto Macros = create({"A(x)=x+x"});
+  auto *A = Lex.id("A");
+  auto Args = lexArgs({"id"});
+  auto Result = uneof(Macros->expand(A, Args));
+  std::vector<MacroAttributes> Attributes = {
+      {tok::identifier, MR_ExpandedArg, 1, 0, {A}},
+      {tok::plus, MR_Hidden, 0, 0, {A}},
+      {tok::identifier, MR_Hidden, 0, 1, {A}},
+  };
+  EXPECT_ATTRIBUTES(Result, Attributes);
+}
+
+} // namespace
+} // namespace format
+} // namespace clang

diff  --git a/clang/unittests/Format/TestLexer.h b/clang/unittests/Format/TestLexer.h
new file mode 100644
index 000000000000..8c5eb2b029fb
--- /dev/null
+++ b/clang/unittests/Format/TestLexer.h
@@ -0,0 +1,88 @@
+//===--- TestLexer.h - Format C++ code --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains a TestLexer to create FormatTokens from strings.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_UNITTESTS_FORMAT_TESTLEXER_H
+#define CLANG_UNITTESTS_FORMAT_TESTLEXER_H
+
+#include "../../lib/Format/FormatTokenLexer.h"
+
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+
+#include <numeric>
+#include <ostream>
+
+namespace clang {
+namespace format {
+
+typedef llvm::SmallVector<FormatToken *, 8> TokenList;
+
+inline std::ostream &operator<<(std::ostream &Stream, const FormatToken &Tok) {
+  Stream << "(" << Tok.Tok.getName() << ", \"" << Tok.TokenText.str() << "\")";
+  return Stream;
+}
+inline std::ostream &operator<<(std::ostream &Stream, const TokenList &Tokens) {
+  Stream << "{";
+  for (size_t I = 0, E = Tokens.size(); I != E; ++I) {
+    Stream << (I > 0 ? ", " : "") << *Tokens[I];
+  }
+  Stream << "}";
+  return Stream;
+}
+
+inline TokenList uneof(const TokenList &Tokens) {
+  assert(!Tokens.empty() && Tokens.back()->is(tok::eof));
+  return TokenList(Tokens.begin(), std::prev(Tokens.end()));
+}
+
+inline std::string text(llvm::ArrayRef<FormatToken *> Tokens) {
+  return std::accumulate(Tokens.begin(), Tokens.end(), std::string(),
+                         [](const std::string &R, FormatToken *Tok) {
+                           return (R + Tok->TokenText).str();
+                         });
+}
+
+class TestLexer {
+public:
+  TestLexer() : SourceMgr("test.cpp", "") {}
+
+  TokenList lex(llvm::StringRef Code) {
+    Buffers.push_back(
+        llvm::MemoryBuffer::getMemBufferCopy(Code, "<scratch space>"));
+    clang::FileID FID = SourceMgr.get().createFileID(SourceManager::Unowned,
+                                                     Buffers.back().get());
+    FormatTokenLexer Lex(SourceMgr.get(), FID, 0, Style, Encoding, Allocator,
+                         IdentTable);
+    auto Result = Lex.lex();
+    return TokenList(Result.begin(), Result.end());
+  }
+
+  FormatToken *id(llvm::StringRef Code) {
+    auto Result = uneof(lex(Code));
+    assert(Result.size() == 1U && "Code must expand to 1 token.");
+    return Result[0];
+  }
+
+  FormatStyle Style = getLLVMStyle();
+  encoding::Encoding Encoding = encoding::Encoding_UTF8;
+  std::vector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
+  clang::SourceManagerForFile SourceMgr;
+  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
+  IdentifierTable IdentTable;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_UNITTESTS_FORMAT_TEST_LEXER_H


        


More information about the cfe-commits mailing list