[clang] [clang][dep-scan] Resolve lexer crash from a permutation of invalid tokens (PR #142452)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 2 11:14:54 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Cyndy Ishida (cyndyishida)
<details>
<summary>Changes</summary>
Sometimes, when a user writes invalid code, the minimization used for scanning can create a stream of tokens that is invalid at lex time. This patch protects against the case where there are valid (non-c++20) import directives discovered in the middle of an invalid `import` declaration.
resolves: rdar://152335844
---
Full diff: https://github.com/llvm/llvm-project/pull/142452.diff
4 Files Affected:
- (modified) clang/include/clang/Basic/DiagnosticLexKinds.td (+2-1)
- (modified) clang/lib/Lex/DependencyDirectivesScanner.cpp (+5)
- (added) clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m (+6)
- (modified) clang/unittests/Lex/DependencyDirectivesScannerTest.cpp (+12-3)
``````````diff
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 723f5d48b4f5f..f9c3e1608a2b0 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -1029,7 +1029,8 @@ def err_dep_source_scanner_missing_semi_after_at_import : Error<
"could not find ';' after @import">;
def err_dep_source_scanner_unexpected_tokens_at_import : Error<
"unexpected extra tokens at end of @import declaration">;
-
+def err_dep_source_scanner_unexpected_tokens_in_directive_body
+ : Error<"unexpected extra tokens inside declaration">;
}
def err_pp_double_begin_pragma_unsafe_buffer_usage :
diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp
index 088d1cc96e3a2..1310077ede614 100644
--- a/clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -503,6 +503,10 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
diag::err_dep_source_scanner_missing_semi_after_at_import);
if (Tok.is(tok::semi))
break;
+ if (Tok.is(tok::hash) || Tok.is(tok::at))
+ return reportError(
+ First,
+ diag::err_dep_source_scanner_unexpected_tokens_in_directive_body);
}
pushDirective(Kind);
skipWhitespace(First, End);
@@ -846,6 +850,7 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) {
if (*First == '@')
return lexAt(First, End);
+ // Handle module directives for C++20 modules.
if (*First == 'i' || *First == 'e' || *First == 'm')
return lexModule(First, End);
diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m
new file mode 100644
index 0000000000000..eb963301807b5
--- /dev/null
+++ b/clang/test/Lexer/minimize_source_to_dependency_directives_invalid_import.m
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1
+
+import <invalid.h>
+#import "invalid.h" // expected-error {{unexpected extra tokens inside declaration}}
+@;
+#pragma clang module import invalid
diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
index bdb5e23510118..90e37bba24ee7 100644
--- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -33,7 +33,7 @@ static bool minimizeSourceToDependencyDirectives(
return false;
}
-
+// Returns false on successful minimization.
static bool minimizeSourceToDependencyDirectives(StringRef Input,
SmallVectorImpl<char> &Out) {
SmallVector<dependency_directives_scan::Token, 16> Tokens;
@@ -677,13 +677,22 @@ TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIncludesAndImports) {
Out.data());
}
-TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) {
+TEST(MinimizeSourceToDependencyDirectivesTest, ImportFailures) {
SmallVector<char, 128> Out;
- ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
+ // Minimization can recover, and let the real compilation fail.
ASSERT_FALSE(
minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out));
ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out));
+
+ ASSERT_FALSE(minimizeSourceToDependencyDirectives("import\n", Out));
+ EXPECT_STREQ("<TokBeforeEOF>\n", Out.data());
+
+ // Minimization cannot recover.
+ ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out));
+ ASSERT_TRUE(minimizeSourceToDependencyDirectives("import <Foo.h>\n"
+ "#include \"Foo.h\"",
+ Out));
}
TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/142452
More information about the cfe-commits
mailing list