[clang] [clang][dep-scan] Resolve lexer crash from a permutation of invalid tokens (PR #142452)
Cyndy Ishida via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 5 18:00:34 PDT 2025
https://github.com/cyndyishida updated https://github.com/llvm/llvm-project/pull/142452
>From 6b403ac7fab68feef8f1a72d8e4ab67ee2ef3c57 Mon Sep 17 00:00:00 2001
From: Cyndy Ishida <cyndy_ishida at apple.com>
Date: Thu, 5 Jun 2025 17:51:53 -0700
Subject: [PATCH] [clang][dep-scan] Resolve lexer crash from a permutation of
invalid tokens
Sometimes, when a user writes invalid code, the minimization used for scanning can create a stream of tokens that is invalid at lex time. This patch protects against the case where there are valid (non-c++20) import directives discovered in the middle of an invalid import declaration.
resolves: rdar://152335844
---
clang/lib/Lex/DependencyDirectivesScanner.cpp | 9 +++++++++
.../Lex/DependencyDirectivesScannerTest.cpp | 17 ++++++++++++++++-
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a2..4606b85d42fe7 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -496,7 +496,15 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
const char *const End) {
const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
for (;;) {
+ // Keep a copy of the First char incase it needs to be reset.
+ const char *Previous = First;
const dependency_directives_scan::Token &Tok = lexToken(First, End);
+ if ((Tok.is(tok::hash) || Tok.is(tok::at)) &&
+ (Tok.Flags & clang::Token::StartOfLine)) {
+ CurDirToks.pop_back();
+ First = Previous;
+ return false;
+ }
if (Tok.is(tok::eof))
return reportError(
DirectiveLoc,
@@ -846,6 +854,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
if (*First == '@')
return lexAt(First, End);
+ // Handle module directives for C++20 modules.
if (*First == 'i' || *First == 'e' || *First == 'm')
return lexModule(First, End);
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index bdb5e23510118..377c066f031d3 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -677,13 +677,28 @@ TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIncludesAndImports) {
Out.data());
}
-TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
+TEST(MinimizeSourceToDependencyDirectivesTest, ImportFailures) {
SmallVector<char, 128> Out;
ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
ASSERT_FALSE(
minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("import <Foo.h>\n"
+ "@import Foo;",
+ Out));
+ EXPECT_STREQ("@import Foo;\n", Out.data());
+
+ ASSERT_FALSE(
+ minimizeSourceToDependencyDirectives("import <Foo.h>\n"
+ "#import <Foo.h>\n"
+ "@;\n"
+ "#pragma clang module import Foo",
+ Out));
+ EXPECT_STREQ("#import <Foo.h>\n"
+ "#pragma clang module import Foo\n",
+ Out.data());
}
TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
More information about the cfe-commits
mailing list