[clang] [clang][dep-scan] Resolve lexer crash from a permutation of invalid tokens (PR #142452)
Cyndy Ishida via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 2 11:14:16 PDT 2025
https://github.com/cyndyishida created https://github.com/llvm/llvm-project/pull/142452
Sometimes, when a user writes invalid code, the minimization used for scanning can create a stream of tokens that is invalid at lex time. This patch protects against the case where there are valid (non-c++20) import directives discovered in the middle of an invalid `import` declaration.
resolves: rdar://152335844
>From 3b2f3d98af5acd5923e795cece7defe835328181 Mon Sep 17 00:00:00 2001
From: Cyndy Ishida <cyndy_ishida at apple.com>
Date: Mon, 2 Jun 2025 11:09:30 -0700
Subject: [PATCH] [clang][dep-scan] Resolve lexer crash from a permutation of
invalid tokens
Sometimes when a user writes invalid code, the minimization used for
scanning can create a stream of tokens that is invalid at lex time.
This patch protects against the case where theres valid import directives
discovered in the middle of an invalid `import` declaration.
resolves: rdar://152335844
---
clang/include/clang/Basic/DiagnosticLexKinds.td | 3 ++-
clang/lib/Lex/DependencyDirectivesScanner.cpp | 5 +++++
...urce_to_dependency_directives_invalid_import.m | 6 ++++++
.../Lex/DependencyDirectivesScannerTest.cpp | 15 ++++++++++++---
4 files changed, 25 insertions(+), 4 deletions(-)
create mode 100644 clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 723f5d48b4f5f..f9c3e1608a2b0 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -1029,7 +1029,8 @@ def err_dep_source_scanner_missing_semi_after_at_import : Error<
"could not find ';' after @import">;
def err_dep_source_scanner_unexpected_tokens_at_import : Error<
"unexpected extra tokens at end of @import declaration">;
-
+def err_dep_source_scanner_unexpected_tokens_in_directive_body
+ : Error<"unexpected extra tokens inside declaration">;
}
def err_pp_double_begin_pragma_unsafe_buffer_usage :
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a2..1310077ede614 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -503,6 +503,10 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
diag::err_dep_source_scanner_missing_semi_after_at_import);
if (Tok.is(tok::semi))
break;
+ if (Tok.is(tok::hash) || Tok.is(tok::at))
+ return reportError(
+ First,
+ diag::err_dep_source_scanner_unexpected_tokens_in_directive_body);
}
pushDirective(Kind);
skipWhitespace(First, End);
@@ -846,6 +850,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
if (*First == '@')
return lexAt(First, End);
+ // Handle module directives for C++20 modules.
if (*First == 'i' || *First == 'e' || *First == 'm')
return lexModule(First, End);
diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m
new file mode 100644
index 0000000000000..eb963301807b5
--- /dev/null
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+
+import <invalid.h>
+#import "invalid.h" // expected-error {{unexpected extra tokens inside declaration}}
+@;
+#pragma clang module import invalid
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index bdb5e23510118..90e37bba24ee7 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -33,7 +33,7 @@ static bool minimizeSourceToDependencyDirectives(
return false;
}
-
+// Returns false on successful minimization.
static bool minimizeSourceToDependencyDirectives(StringRef Input,
SmallVectorImpl<char> &Out) {
SmallVector<dependency_directives_scan::Token, 16> Tokens;
@@ -677,13 +677,22 @@ TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIncludesAndImports) {
Out.data());
}
-TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
+TEST(MinimizeSourceToDependencyDirectivesTest, ImportFailures) {
SmallVector<char, 128> Out;
- ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
+ // Minimization can recover, and let the real compilation fail.
ASSERT_FALSE(
minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("import\n", Out));
+ EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
+
+ // Minimization cannot recover.
+ ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
+ ASSERT_TRUE(minimizeSourceToDependencyDirectives("import <Foo.h>\n"
+ "#include \"Foo.h\"",
+ Out));
}
TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
More information about the cfe-commits
mailing list