[clang] 93dc73b - [Lexer] Fix bug in `makeFileCharRange` called on split tokens.
Yitzhak Mandelbaum via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 14 07:36:53 PDT 2021
Author: Yitzhak Mandelbaum
Date: 2021-07-14T14:36:31Z
New Revision: 93dc73b1e0f31c712e5b8bbac14491ce55c414ad
URL: https://github.com/llvm/llvm-project/commit/93dc73b1e0f31c712e5b8bbac14491ce55c414ad
DIFF: https://github.com/llvm/llvm-project/commit/93dc73b1e0f31c712e5b8bbac14491ce55c414ad.diff
LOG: [Lexer] Fix bug in `makeFileCharRange` called on split tokens.
When the end loc of the specified range is a split token, `makeFileCharRange`
does not process it correctly. This patch adds proper support for split tokens.
Differential Revision: https://reviews.llvm.org/D105365
Added:
Modified:
clang/lib/Lex/Lexer.cpp
clang/unittests/Lex/LexerTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index cb2b19b59c4ec..2cc4cae533d07 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -877,6 +877,14 @@ static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range,
return CharSourceRange::getCharRange(Begin, End);
}
+// Assumes that `Loc` is in an expansion.
+static bool isInExpansionTokenRange(const SourceLocation Loc,
+ const SourceManager &SM) {
+ return SM.getSLocEntry(SM.getFileID(Loc))
+ .getExpansion()
+ .isExpansionTokenRange();
+}
+
CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
const SourceManager &SM,
const LangOptions &LangOpts) {
@@ -896,10 +904,12 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
}
if (Begin.isFileID() && End.isMacroID()) {
- if ((Range.isTokenRange() && !isAtEndOfMacroExpansion(End, SM, LangOpts,
- &End)) ||
- (Range.isCharRange() && !isAtStartOfMacroExpansion(End, SM, LangOpts,
- &End)))
+ if (Range.isTokenRange()) {
+ if (!isAtEndOfMacroExpansion(End, SM, LangOpts, &End))
+ return {};
+ // Use the *original* end, not the expanded one in `End`.
+ Range.setTokenRange(isInExpansionTokenRange(Range.getEnd(), SM));
+ } else if (!isAtStartOfMacroExpansion(End, SM, LangOpts, &End))
return {};
Range.setEnd(End);
return makeRangeFromFileLocs(Range, SM, LangOpts);
@@ -914,6 +924,9 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range,
&MacroEnd)))) {
Range.setBegin(MacroBegin);
Range.setEnd(MacroEnd);
+ // Use the *original* `End`, not the expanded one in `MacroEnd`.
+ if (Range.isTokenRange())
+ Range.setTokenRange(isInExpansionTokenRange(End, SM));
return makeRangeFromFileLocs(Range, SM, LangOpts);
}
diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index 4cdabe042cc83..319c63f6a50ba 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -25,6 +25,7 @@
#include "clang/Lex/PreprocessorOptions.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include <memory>
#include <vector>
namespace {
@@ -65,7 +66,7 @@ class LexerTest : public ::testing::Test {
std::vector<Token> Lex(StringRef Source) {
TrivialModuleLoader ModLoader;
- auto PP = CreatePP(Source, ModLoader);
+ PP = CreatePP(Source, ModLoader);
std::vector<Token> toks;
while (1) {
@@ -109,6 +110,7 @@ class LexerTest : public ::testing::Test {
LangOptions LangOpts;
std::shared_ptr<TargetOptions> TargetOpts;
IntrusiveRefCntPtr<TargetInfo> Target;
+ std::unique_ptr<Preprocessor> PP;
};
TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
@@ -264,12 +266,14 @@ TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
TEST_F(LexerTest, LexAPI) {
std::vector<tok::TokenKind> ExpectedTokens;
+ // Line 1 (after the #defines)
ExpectedTokens.push_back(tok::l_square);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_square);
ExpectedTokens.push_back(tok::l_square);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::r_square);
+ // Line 2
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
ExpectedTokens.push_back(tok::identifier);
@@ -357,6 +361,65 @@ TEST_F(LexerTest, LexAPI) {
EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
}
+TEST_F(LexerTest, HandlesSplitTokens) {
+ std::vector<tok::TokenKind> ExpectedTokens;
+ // Line 1 (after the #defines)
+ ExpectedTokens.push_back(tok::identifier);
+ ExpectedTokens.push_back(tok::less);
+ ExpectedTokens.push_back(tok::identifier);
+ ExpectedTokens.push_back(tok::less);
+ ExpectedTokens.push_back(tok::greatergreater);
+ // Line 2
+ ExpectedTokens.push_back(tok::identifier);
+ ExpectedTokens.push_back(tok::less);
+ ExpectedTokens.push_back(tok::identifier);
+ ExpectedTokens.push_back(tok::less);
+ ExpectedTokens.push_back(tok::greatergreater);
+
+ std::vector<Token> toks = CheckLex("#define TY ty\n"
+ "#define RANGLE ty<ty<>>\n"
+ "TY<ty<>>\n"
+ "RANGLE",
+ ExpectedTokens);
+
+ SourceLocation outerTyLoc = toks[0].getLocation();
+ SourceLocation innerTyLoc = toks[2].getLocation();
+ SourceLocation gtgtLoc = toks[4].getLocation();
+ // Split the token to simulate the action of the parser and force creation of
+ // an `ExpansionTokenRange`.
+ SourceLocation rangleLoc = PP->SplitToken(gtgtLoc, 1);
+
+ // Verify that it only captures the first greater-then and not the second one.
+ CharSourceRange range = Lexer::makeFileCharRange(
+ CharSourceRange::getTokenRange(innerTyLoc, rangleLoc), SourceMgr,
+ LangOpts);
+ EXPECT_TRUE(range.isCharRange());
+ EXPECT_EQ(range.getAsRange(),
+ SourceRange(innerTyLoc, gtgtLoc.getLocWithOffset(1)));
+
+ // Verify case where range begins in a macro expansion.
+ range = Lexer::makeFileCharRange(
+ CharSourceRange::getTokenRange(outerTyLoc, rangleLoc), SourceMgr,
+ LangOpts);
+ EXPECT_TRUE(range.isCharRange());
+ EXPECT_EQ(range.getAsRange(),
+ SourceRange(SourceMgr.getExpansionLoc(outerTyLoc),
+ gtgtLoc.getLocWithOffset(1)));
+
+ SourceLocation macroInnerTyLoc = toks[7].getLocation();
+ SourceLocation macroGtgtLoc = toks[9].getLocation();
+ // Split the token to simulate the action of the parser and force creation of
+ // an `ExpansionTokenRange`.
+ SourceLocation macroRAngleLoc = PP->SplitToken(macroGtgtLoc, 1);
+
+ // Verify that it fails (because it only captures the first greater-then and
+ // not the second one, so it doesn't span the entire macro expansion).
+ range = Lexer::makeFileCharRange(
+ CharSourceRange::getTokenRange(macroInnerTyLoc, macroRAngleLoc),
+ SourceMgr, LangOpts);
+ EXPECT_TRUE(range.isInvalid());
+}
+
TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
std::vector<Token> toks =
Lex("#define helper1 0\n"
More information about the cfe-commits
mailing list