[clang] 897b030 - [clang][dep-scan] Resolve lexer crash from a permutation of invalid tokens (#142452)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 6 13:45:20 PDT 2025
Author: Cyndy Ishida
Date: 2025-06-06T13:45:16-07:00
New Revision: 897b0301d2e2ff28d3976fe95b64be5a85815908
URL: https://github.com/llvm/llvm-project/commit/897b0301d2e2ff28d3976fe95b64be5a85815908
DIFF: https://github.com/llvm/llvm-project/commit/897b0301d2e2ff28d3976fe95b64be5a85815908.diff
LOG: [clang][dep-scan] Resolve lexer crash from a permutation of invalid tokens (#142452)
Sometimes, when a user writes invalid code, the minimization used for
scanning can create a stream of tokens that is invalid at lex time. This
patch protects against the case where there are valid (non-c++20) import
directives discovered in the middle of an invalid `import` declaration.
Mostly authored by: @akyrtzi
resolves: rdar://152335844
Added:
Modified:
clang/lib/Lex/DependencyDirectivesScanner.cpp
clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a2..4606b85d42fe7 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -496,7 +496,15 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
const char *const End) {
const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
for (;;) {
+ // Keep a copy of the First char incase it needs to be reset.
+ const char *Previous = First;
const dependency_directives_scan::Token &Tok = lexToken(First, End);
+ if ((Tok.is(tok::hash) || Tok.is(tok::at)) &&
+ (Tok.Flags & clang::Token::StartOfLine)) {
+ CurDirToks.pop_back();
+ First = Previous;
+ return false;
+ }
if (Tok.is(tok::eof))
return reportError(
DirectiveLoc,
@@ -846,6 +854,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
if (*First == '@')
return lexAt(First, End);
+ // Handle module directives for C++20 modules.
if (*First == 'i' || *First == 'e' || *First == 'm')
return lexModule(First, End);
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index bdb5e23510118..377c066f031d3 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -677,13 +677,28 @@ TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIncludesAndImports) {
Out.data());
}
-TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
+TEST(MinimizeSourceToDependencyDirectivesTest, ImportFailures) {
SmallVector<char, 128> Out;
ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
ASSERT_FALSE(
minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("import <Foo.h>\n"
+ "@import Foo;",
+ Out));
+ EXPECT_STREQ("@import Foo;\n", Out.data());
+
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives("import <Foo.h>\n"
+ "#import <Foo.h>\n"
+ "@;\n"
+ "#pragma clang module import Foo",
+ Out));
+ EXPECT_STREQ("#import <Foo.h>\n"
+ "#pragma clang module import Foo\n",
+ Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
More information about the cfe-commits
mailing list