r364236 - [Syntax] Do not glue multiple empty PP expansions to a single mapping
Ilya Biryukov via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 24 14:39:52 PDT 2019
Author: ibiryukov
Date: Mon Jun 24 14:39:51 2019
New Revision: 364236
URL: http://llvm.org/viewvc/llvm-project?rev=364236&view=rev
Log:
[Syntax] Do not glue multiple empty PP expansions to a single mapping
Summary:
This change makes sure we have a single mapping for each macro expansion,
even if the result of expansion was empty.
To achieve that, we take information from PPCallbacks::MacroExpands into
account. Previously we relied only on source locations of expanded tokens.
Reviewers: sammccall
Reviewed By: sammccall
Subscribers: cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D62953
Modified:
cfe/trunk/include/clang/Tooling/Syntax/Tokens.h
cfe/trunk/lib/Tooling/Syntax/Tokens.cpp
cfe/trunk/unittests/Tooling/Syntax/TokensTest.cpp
Modified: cfe/trunk/include/clang/Tooling/Syntax/Tokens.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Tooling/Syntax/Tokens.h?rev=364236&r1=364235&r2=364236&view=diff
==============================================================================
--- cfe/trunk/include/clang/Tooling/Syntax/Tokens.h (original)
+++ cfe/trunk/include/clang/Tooling/Syntax/Tokens.h Mon Jun 24 14:39:51 2019
@@ -160,6 +160,7 @@ llvm::raw_ostream &operator<<(llvm::raw_
/// the spelled tokens of a file using the tokenize() helper.
///
/// FIXME: allow to map from spelled to expanded tokens when use-case shows up.
+/// FIXME: allow mappings into macro arguments.
class TokenBuffer {
public:
TokenBuffer(const SourceManager &SourceMgr) : SourceMgr(&SourceMgr) {}
@@ -227,6 +228,8 @@ public:
/// #pragma, etc.
llvm::ArrayRef<syntax::Token> spelledTokens(FileID FID) const;
+ const SourceManager &sourceManager() const { return *SourceMgr; }
+
std::string dumpForTests() const;
private:
@@ -310,9 +313,32 @@ public:
LLVM_NODISCARD TokenBuffer consume() &&;
private:
+ /// Maps from a start to an end spelling location of transformations
+ /// performed by the preprocessor. These include:
+ /// 1. range from '#' to the last token in the line for PP directives,
+ /// 2. macro name and arguments for macro expansions.
+ /// Note that we record only top-level macro expansions, intermediate
+ /// expansions (e.g. inside macro arguments) are ignored.
+ ///
+ /// Used to find correct boundaries of macro calls and directives when
+ /// building mappings from spelled to expanded tokens.
+ ///
+ /// Logically, at each point of the preprocessor execution there is a stack of
+ /// macro expansions being processed and we could use it to recover the
+ /// location information we need. However, the public preprocessor API only
+ /// exposes the points when macro expansions start (when we push a macro onto
+ /// the stack) and not when they end (when we pop a macro from the stack).
+ /// To workaround this limitation, we rely on source location information
+ /// stored in this map.
+ using PPExpansions = llvm::DenseMap</*SourceLocation*/ int, SourceLocation>;
class Builder;
+ class CollectPPExpansions;
+
std::vector<syntax::Token> Expanded;
+ // FIXME: we only store macro expansions, also add directives(#pragma, etc.)
+ PPExpansions Expansions;
Preprocessor &PP;
+ CollectPPExpansions *Collector;
};
} // namespace syntax
Modified: cfe/trunk/lib/Tooling/Syntax/Tokens.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Tooling/Syntax/Tokens.cpp?rev=364236&r1=364235&r2=364236&view=diff
==============================================================================
--- cfe/trunk/lib/Tooling/Syntax/Tokens.cpp (original)
+++ cfe/trunk/lib/Tooling/Syntax/Tokens.cpp Mon Jun 24 14:39:51 2019
@@ -14,6 +14,7 @@
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/Token.h"
#include "llvm/ADT/ArrayRef.h"
@@ -70,8 +71,8 @@ llvm::raw_ostream &syntax::operator<<(ll
FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
: File(File), Begin(BeginOffset), End(EndOffset) {
- assert(File.isValid());
- assert(BeginOffset <= EndOffset);
+ assert(File.isValid());
+ assert(BeginOffset <= EndOffset);
}
FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
@@ -252,6 +253,39 @@ std::vector<syntax::Token> syntax::token
return Tokens;
}
+/// Records information reqired to construct mappings for the token buffer that
+/// we are collecting.
+class TokenCollector::CollectPPExpansions : public PPCallbacks {
+public:
+ CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
+
+ /// Disabled instance will stop reporting anything to TokenCollector.
+ /// This ensures that uses of the preprocessor after TokenCollector::consume()
+ /// is called do not access the (possibly invalid) collector instance.
+ void disable() { Collector = nullptr; }
+
+ void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
+ SourceRange Range, const MacroArgs *Args) override {
+ if (!Collector)
+ return;
+ // Only record top-level expansions, not those where:
+ // - the macro use is inside a macro body,
+ // - the macro appears in an argument to another macro.
+ if (!MacroNameTok.getLocation().isFileID() ||
+ (LastExpansionEnd.isValid() &&
+ Collector->PP.getSourceManager().isBeforeInTranslationUnit(
+ Range.getBegin(), LastExpansionEnd)))
+ return;
+ Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd();
+ LastExpansionEnd = Range.getEnd();
+ }
+ // FIXME: handle directives like #pragma, #include, etc.
+private:
+ TokenCollector *Collector;
+ /// Used to detect recursive macro expansions.
+ SourceLocation LastExpansionEnd;
+};
+
/// Fills in the TokenBuffer by tracing the run of a preprocessor. The
/// implementation tracks the tokens, macro expansions and directives coming
/// from the preprocessor and:
@@ -279,15 +313,21 @@ TokenCollector::TokenCollector(Preproces
);
Expanded.push_back(syntax::Token(T));
});
+ // And locations of macro calls, to properly recover boundaries of those in
+ // case of empty expansions.
+ auto CB = llvm::make_unique<CollectPPExpansions>(*this);
+ this->Collector = CB.get();
+ PP.addPPCallbacks(std::move(CB));
}
/// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
/// token stream.
class TokenCollector::Builder {
public:
- Builder(std::vector<syntax::Token> Expanded, const SourceManager &SM,
- const LangOptions &LangOpts)
- : Result(SM), SM(SM), LangOpts(LangOpts) {
+ Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
+ const SourceManager &SM, const LangOptions &LangOpts)
+ : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
+ LangOpts(LangOpts) {
Result.ExpandedTokens = std::move(Expanded);
}
@@ -296,6 +336,9 @@ public:
// Walk over expanded tokens and spelled tokens in parallel, building the
// mappings between those using source locations.
+ // To correctly recover empty macro expansions, we also take locations
+ // reported to PPCallbacks::MacroExpands into account as we do not have any
+ // expanded tokens with source locations to guide us.
// The 'eof' token is special, it is not part of spelled token stream. We
// handle it separately at the end.
@@ -356,22 +399,17 @@ private:
fillGapUntil(File, SpelledRange.getBegin(), I);
- TokenBuffer::Mapping M;
- // Skip the spelled macro tokens.
- std::tie(M.BeginSpelled, M.EndSpelled) =
- consumeSpelledUntil(File, SpelledRange.getEnd().getLocWithOffset(1));
// Skip all expanded tokens from the same macro expansion.
- M.BeginExpanded = I;
+ unsigned BeginExpanded = I;
for (; I + 1 < Result.ExpandedTokens.size(); ++I) {
auto NextL = Result.ExpandedTokens[I + 1].location();
if (!NextL.isMacroID() ||
SM.getExpansionLoc(NextL) != SpelledRange.getBegin())
break;
}
- M.EndExpanded = I + 1;
-
- // Add a resulting mapping.
- File.Mappings.push_back(M);
+ unsigned EndExpanded = I + 1;
+ consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded,
+ EndExpanded, NextSpelled[FID]);
}
/// Initializes TokenBuffer::Files and fills spelled tokens and expanded
@@ -393,69 +431,108 @@ private:
}
}
- /// Consumed spelled tokens until location L is reached (token starting at L
- /// is not included). Returns the indicies of the consumed range.
- std::pair</*Begin*/ unsigned, /*End*/ unsigned>
- consumeSpelledUntil(TokenBuffer::MarkedFile &File, SourceLocation L) {
- assert(L.isFileID());
- FileID FID;
- unsigned Offset;
- std::tie(FID, Offset) = SM.getDecomposedLoc(L);
+ void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
+ unsigned ExpandedIndex, unsigned &SpelledIndex) {
+ consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex);
+ }
- // (!) we update the index in-place.
- unsigned &SpelledI = NextSpelled[FID];
- unsigned Before = SpelledI;
- for (; SpelledI < File.SpelledTokens.size() &&
- SM.getFileOffset(File.SpelledTokens[SpelledI].location()) < Offset;
- ++SpelledI) {
- }
- return std::make_pair(Before, /*After*/ SpelledI);
- };
+ /// Consumes spelled tokens that form a macro expansion and adds a entry to
+ /// the resulting token buffer.
+ /// (!) SpelledIndex is updated in-place.
+ void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
+ unsigned BeginExpanded, unsigned EndExpanded,
+ unsigned &SpelledIndex) {
+ // We need to record this mapping before continuing.
+ unsigned MappingBegin = SpelledIndex;
+ ++SpelledIndex;
+
+ bool HitMapping =
+ tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue();
+ (void)HitMapping;
+ assert(!HitMapping && "recursive macro expansion?");
+
+ TokenBuffer::Mapping M;
+ M.BeginExpanded = BeginExpanded;
+ M.EndExpanded = EndExpanded;
+ M.BeginSpelled = MappingBegin;
+ M.EndSpelled = SpelledIndex;
+
+ File.Mappings.push_back(M);
+ }
/// Consumes spelled tokens until location \p L is reached and adds a mapping
/// covering the consumed tokens. The mapping will point to an empty expanded
/// range at position \p ExpandedIndex.
void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L,
unsigned ExpandedIndex) {
- unsigned BeginSpelledGap, EndSpelledGap;
- std::tie(BeginSpelledGap, EndSpelledGap) = consumeSpelledUntil(File, L);
- if (BeginSpelledGap == EndSpelledGap)
- return; // No gap.
- TokenBuffer::Mapping M;
- M.BeginSpelled = BeginSpelledGap;
- M.EndSpelled = EndSpelledGap;
- M.BeginExpanded = M.EndExpanded = ExpandedIndex;
- File.Mappings.push_back(M);
+ assert(L.isFileID());
+ FileID FID;
+ unsigned Offset;
+ std::tie(FID, Offset) = SM.getDecomposedLoc(L);
+
+ unsigned &SpelledIndex = NextSpelled[FID];
+ unsigned MappingBegin = SpelledIndex;
+ while (true) {
+ auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex);
+ if (SpelledIndex != MappingBegin) {
+ TokenBuffer::Mapping M;
+ M.BeginSpelled = MappingBegin;
+ M.EndSpelled = SpelledIndex;
+ M.BeginExpanded = M.EndExpanded = ExpandedIndex;
+ File.Mappings.push_back(M);
+ }
+ if (!EndLoc)
+ break;
+ consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex,
+ SpelledIndex);
+
+ MappingBegin = SpelledIndex;
+ }
};
+ /// Consumes spelled tokens until it reaches Offset or a mapping boundary,
+ /// i.e. a name of a macro expansion or the start '#' token of a PP directive.
+ /// (!) NextSpelled is updated in place.
+ ///
+ /// returns None if \p Offset was reached, otherwise returns the end location
+ /// of a mapping that starts at \p NextSpelled.
+ llvm::Optional<SourceLocation>
+ tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset,
+ unsigned &NextSpelled) {
+ for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) {
+ auto L = File.SpelledTokens[NextSpelled].location();
+ if (Offset <= SM.getFileOffset(L))
+ return llvm::None; // reached the offset we are looking for.
+ auto Mapping = CollectedExpansions.find(L.getRawEncoding());
+ if (Mapping != CollectedExpansions.end())
+ return Mapping->second; // found a mapping before the offset.
+ }
+ return llvm::None; // no more tokens, we "reached" the offset.
+ }
+
/// Adds empty mappings for unconsumed spelled tokens at the end of each file.
void fillGapsAtEndOfFiles() {
for (auto &F : Result.Files) {
- unsigned Next = NextSpelled[F.first];
- if (F.second.SpelledTokens.size() == Next)
- continue; // All spelled tokens are accounted for.
-
- // Record a mapping for the gap at the end of the spelled tokens.
- TokenBuffer::Mapping M;
- M.BeginSpelled = Next;
- M.EndSpelled = F.second.SpelledTokens.size();
- M.BeginExpanded = F.second.EndExpanded;
- M.EndExpanded = F.second.EndExpanded;
-
- F.second.Mappings.push_back(M);
+ if (F.second.SpelledTokens.empty())
+ continue;
+ fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(),
+ F.second.EndExpanded);
}
}
TokenBuffer Result;
/// For each file, a position of the next spelled token we will consume.
llvm::DenseMap<FileID, unsigned> NextSpelled;
+ PPExpansions CollectedExpansions;
const SourceManager &SM;
const LangOptions &LangOpts;
};
TokenBuffer TokenCollector::consume() && {
PP.setTokenWatcher(nullptr);
- return Builder(std::move(Expanded), PP.getSourceManager(), PP.getLangOpts())
+ Collector->disable();
+ return Builder(std::move(Expanded), std::move(Expansions),
+ PP.getSourceManager(), PP.getLangOpts())
.build();
}
Modified: cfe/trunk/unittests/Tooling/Syntax/TokensTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Tooling/Syntax/TokensTest.cpp?rev=364236&r1=364235&r2=364236&view=diff
==============================================================================
--- cfe/trunk/unittests/Tooling/Syntax/TokensTest.cpp (original)
+++ cfe/trunk/unittests/Tooling/Syntax/TokensTest.cpp Mon Jun 24 14:39:51 2019
@@ -424,6 +424,7 @@ file './input.cpp'
" ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
" ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
// Empty macro replacement.
+ // FIXME: the #define directives should not be glued together.
{R"cpp(
#define EMPTY
#define EMPTY_FUNC(X)
@@ -436,7 +437,9 @@ file './input.cpp'
spelled tokens:
# define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
mappings:
- ['#'_0, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
+ ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
+ ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
+ ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
)"},
// File ends with a macro replacement.
{R"cpp(
More information about the cfe-commits
mailing list