[clang] f7e8be7 - Skip escaped newlines before checking for whitespace in Lexer::getRawToken. (#117548)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Dec 5 06:37:50 PST 2024
Author: Samira Bazuzi
Date: 2024-12-05T09:37:46-05:00
New Revision: f7e8be7c66b53a126c8cba9ac81b5b77d873aa1e
URL: https://github.com/llvm/llvm-project/commit/f7e8be7c66b53a126c8cba9ac81b5b77d873aa1e
DIFF: https://github.com/llvm/llvm-project/commit/f7e8be7c66b53a126c8cba9ac81b5b77d873aa1e.diff
LOG: Skip escaped newlines before checking for whitespace in Lexer::getRawToken. (#117548)
The Lexer used in getRawToken is not told to keep whitespace, so when it
skips over escaped newlines, it also ignores whitespace, regardless of
getRawToken's IgnoreWhiteSpace parameter.
Instead of letting this case fall through to lexing, check
for whitespace after skipping over any escaped newlines.
Added:
Modified:
clang/lib/Lex/Lexer.cpp
clang/unittests/Lex/LexerTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index e58c8bc72ae5b3..72364500a48f9f 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -527,7 +527,7 @@ bool Lexer::getRawToken(SourceLocation Loc, Token &Result,
const char *StrData = Buffer.data()+LocInfo.second;
- if (!IgnoreWhiteSpace && isWhitespace(StrData[0]))
+ if (!IgnoreWhiteSpace && isWhitespace(SkipEscapedNewLines(StrData)[0]))
return true;
// Create a lexer starting at the beginning of this token.
diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index 47aa2c131a304d..aead7fb899d0a8 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -652,6 +652,38 @@ TEST_F(LexerTest, RawAndNormalLexSameForLineComments) {
EXPECT_TRUE(ToksView.empty());
}
+TEST_F(LexerTest, GetRawTokenOnEscapedNewLineChecksWhitespace) {
+ const llvm::StringLiteral Source = R"cc(
+ #define ONE \
+ 1
+
+ int i = ONE;
+ )cc";
+ std::vector<Token> Toks =
+ CheckLex(Source, {tok::kw_int, tok::identifier, tok::equal,
+ tok::numeric_constant, tok::semi});
+
+ // Set up by getting the raw token for the `1` in the macro definition.
+ const Token &OneExpanded = Toks[3];
+ Token Tok;
+ ASSERT_FALSE(
+ Lexer::getRawToken(OneExpanded.getLocation(), Tok, SourceMgr, LangOpts));
+ // The `ONE`.
+ ASSERT_EQ(Tok.getKind(), tok::raw_identifier);
+ ASSERT_FALSE(
+ Lexer::getRawToken(SourceMgr.getSpellingLoc(OneExpanded.getLocation()),
+ Tok, SourceMgr, LangOpts));
+ // The `1` in the macro definition.
+ ASSERT_EQ(Tok.getKind(), tok::numeric_constant);
+
+ // Go back 4 characters: two spaces, one newline, and the backslash.
+ SourceLocation EscapedNewLineLoc = Tok.getLocation().getLocWithOffset(-4);
+ // Expect true (=failure) because the whitespace immediately after the
+ // escaped newline is not ignored.
+ EXPECT_TRUE(Lexer::getRawToken(EscapedNewLineLoc, Tok, SourceMgr, LangOpts,
+ /*IgnoreWhiteSpace=*/false));
+}
+
TEST(LexerPreambleTest, PreambleBounds) {
std::vector<std::string> Cases = {
R"cc([[
More information about the cfe-commits
mailing list