[clang] [Clang][Preprocessor] Unify header-name lookahead for import and include (PR #191004)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 15 21:54:13 PDT 2026
https://github.com/yronglin updated https://github.com/llvm/llvm-project/pull/191004
>From 373880aed203efd8521dfb76a3f52fedee2592dc Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Thu, 9 Apr 2026 00:19:55 +0800
Subject: [PATCH 1/7] [C++][Modules][Preprocessor] Clang should not convert a
import preprocessing token to contextual keyword if a digraph character
following import
Signed-off-by: yronglin <yronglin777 at gmail.com>
---
clang/docs/ReleaseNotes.rst | 1 +
clang/include/clang/Lex/Lexer.h | 4 ++++
clang/lib/Lex/Lexer.cpp | 26 ++++++++++++++--------
clang/lib/Lex/Preprocessor.cpp | 33 +++++++++++++++++++++++-----
clang/test/CXX/module/cpp.pre/p1.cpp | 26 ++++++++++++++++++++++
5 files changed, 75 insertions(+), 15 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2da7175b51ea3..9c0155265874b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,6 +406,7 @@ Bug Fixes in This Version
- Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect array filler lowering. (#GH189643)
- Fixed the behavior in C23 of ``auto``, by emitting an error when an array type is specified for a ``char *``. (#GH162694)
- Fixed incorrect rejection of ``auto`` with reordered declaration specifiers in C23. (#GH164121)
+- Fixed incorrect handling of C++ import preprocessing token when a digraph character after import. (#GH190693)
Bug Fixes to Compiler Builtins
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 0459a863bc08d..8e4cc7a95b327 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -732,6 +732,10 @@ class Lexer : public PreprocessorLexer {
/// otherwise return P.
static const char *SkipEscapedNewLines(const char *P);
+ /// SkipHorizontalWhitespace - Skip the horizontak whitespace characters and
+ /// returns the advanced pointer.
+ static const char *SkipHorizontalWhitespace(const char *Ptr);
+
/// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a
/// diagnostic.
static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 10246552bb13d..29caeb943e3df 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1336,6 +1336,18 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
}
}
+const char *Lexer::SkipHorizontalWhitespace(const char *Ptr) {
+ // Small amounts of horizontal whitespace is very common between tokens.
+ // Check for space character separately to skip the expensive
+ // isHorizontalWhitespace() check
+ if (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)) {
+ do {
+ ++Ptr;
+ } while (*Ptr == ' ' || isHorizontalWhitespace(*Ptr));
+ }
+ return Ptr;
+}
+
std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts,
@@ -3764,16 +3776,12 @@ bool Lexer::LexTokenInternal(Token &Result) {
assert(!Result.hasPtrData() && "Result has not been reset");
// CurPtr - Cache BufferPtr in an automatic variable.
- const char *CurPtr = BufferPtr;
-
- // Small amounts of horizontal whitespace is very common between tokens.
- // Check for space character separately to skip the expensive
- // isHorizontalWhitespace() check
- if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) {
- do {
- ++CurPtr;
- } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr));
+ const char *CurPtr = SkipHorizontalWhitespace(BufferPtr);
+ /// CurPtr has been advanced forward, indicating that a horizontal whitespace
+ /// character has been encountered. Check if the Lexer is in keep whitespace
+ /// mode.
+ if (CurPtr != BufferPtr) {
// If we are keeping whitespace and other tokens, just return what we just
// skipped. The next lexer invocation will return the token after the
// whitespace.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index c430da67c1469..4130e64be855e 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1380,13 +1380,34 @@ bool Preprocessor::HandleModuleContextualKeyword(Token &Result) {
llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
CurPPLexer->ParsingPreprocessorDirective, true);
- // The next token may be an angled string literal after import keyword.
- llvm::SaveAndRestore<bool> SavedParsingFilemame(
- CurPPLexer->ParsingFilename,
- Result.getIdentifierInfo()->isImportKeyword());
+ bool ParsingFilename = false;
+ if (Result.getIdentifierInfo()->isImportKeyword()) {
+ if (getLangOpts().Digraphs && CurLexer &&
+ CurLexer->getCurrentBufferOffset() + 2 < CurLexer->getBuffer().size()) {
+ // If the import preprocessing token folled by a digraph character '<:',
+ // the import preprocessing should not traited as a import contextual
+ // keyword. Eg.
+ // int
+ // import <:10
+ // :>;
+ //
+ // This is a array definition, and equivalent to:
+ //
+ // int import[10];
+ const char *CurPtr = CurLexer->getBufferLocation();
+ CurPtr = Lexer::SkipHorizontalWhitespace(CurPtr);
+ auto C0 = Lexer::getCharAndSizeNoWarn(CurPtr, getLangOpts());
+ auto C1 = Lexer::getCharAndSizeNoWarn(CurPtr + C0.Size, getLangOpts());
+ if (C0.Char == '<' && (C1.Char == ':' || C1.Char == '%'))
+ return false;
+ }
+ ParsingFilename = true;
+ }
- std::optional<Token> NextTok =
- CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken();
+ // The next token may be an angled string literal after import keyword.
+ llvm::SaveAndRestore<bool> SavedParsingFilemame(CurPPLexer->ParsingFilename,
+ ParsingFilename);
+ std::optional<Token> NextTok = peekNextPPToken();
if (!NextTok)
return false;
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp
index 989915004ff57..0e2fb65390e99 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,6 +38,8 @@
// RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
//--- hash.cpp
@@ -205,3 +207,27 @@ export module m; // expected-error {{module directive lines are not allowed on l
// expected-error {{module declaration must occur at the start of the translation unit}} \
// expected-note@#1 {{add 'module;'}}
#endif
+
+//--- digraph.cpp
+// expected-no-diagnostics
+int
+import <:10
+:>;
+
+void foo() {
+ for (int i = 0; i < 10; ++i)
+ import[i] = i;
+}
+
+//--- digraph2.cpp
+// expected-no-diagnostics
+using import = int;
+
+void bar(int);
+
+void foo(int val =
+import <%%>
+) {
+ bar(val);
+}
+
>From 0c0d98300b68c2237876aef08150b0f160b08470 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Thu, 9 Apr 2026 21:54:18 +0800
Subject: [PATCH 2/7] Revert "[C++][Modules][Preprocessor] Clang should not
convert a import preprocessing token to contextual keyword if a digraph
character following import"
This reverts commit 373880aed203efd8521dfb76a3f52fedee2592dc.
---
clang/docs/ReleaseNotes.rst | 1 -
clang/include/clang/Lex/Lexer.h | 4 ----
clang/lib/Lex/Lexer.cpp | 26 ++++++++--------------
clang/lib/Lex/Preprocessor.cpp | 33 +++++-----------------------
clang/test/CXX/module/cpp.pre/p1.cpp | 26 ----------------------
5 files changed, 15 insertions(+), 75 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9c0155265874b..2da7175b51ea3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,7 +406,6 @@ Bug Fixes in This Version
- Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect array filler lowering. (#GH189643)
- Fixed the behavior in C23 of ``auto``, by emitting an error when an array type is specified for a ``char *``. (#GH162694)
- Fixed incorrect rejection of ``auto`` with reordered declaration specifiers in C23. (#GH164121)
-- Fixed incorrect handling of C++ import preprocessing token when a digraph character after import. (#GH190693)
Bug Fixes to Compiler Builtins
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 8e4cc7a95b327..0459a863bc08d 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -732,10 +732,6 @@ class Lexer : public PreprocessorLexer {
/// otherwise return P.
static const char *SkipEscapedNewLines(const char *P);
- /// SkipHorizontalWhitespace - Skip the horizontak whitespace characters and
- /// returns the advanced pointer.
- static const char *SkipHorizontalWhitespace(const char *Ptr);
-
/// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a
/// diagnostic.
static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 29caeb943e3df..10246552bb13d 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1336,18 +1336,6 @@ const char *Lexer::SkipEscapedNewLines(const char *P) {
}
}
-const char *Lexer::SkipHorizontalWhitespace(const char *Ptr) {
- // Small amounts of horizontal whitespace is very common between tokens.
- // Check for space character separately to skip the expensive
- // isHorizontalWhitespace() check
- if (*Ptr == ' ' || isHorizontalWhitespace(*Ptr)) {
- do {
- ++Ptr;
- } while (*Ptr == ' ' || isHorizontalWhitespace(*Ptr));
- }
- return Ptr;
-}
-
std::optional<Token> Lexer::findNextToken(SourceLocation Loc,
const SourceManager &SM,
const LangOptions &LangOpts,
@@ -3776,12 +3764,16 @@ bool Lexer::LexTokenInternal(Token &Result) {
assert(!Result.hasPtrData() && "Result has not been reset");
// CurPtr - Cache BufferPtr in an automatic variable.
- const char *CurPtr = SkipHorizontalWhitespace(BufferPtr);
+ const char *CurPtr = BufferPtr;
+
+ // Small amounts of horizontal whitespace is very common between tokens.
+ // Check for space character separately to skip the expensive
+ // isHorizontalWhitespace() check
+ if (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr)) {
+ do {
+ ++CurPtr;
+ } while (*CurPtr == ' ' || isHorizontalWhitespace(*CurPtr));
- /// CurPtr has been advanced forward, indicating that a horizontal whitespace
- /// character has been encountered. Check if the Lexer is in keep whitespace
- /// mode.
- if (CurPtr != BufferPtr) {
// If we are keeping whitespace and other tokens, just return what we just
// skipped. The next lexer invocation will return the token after the
// whitespace.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 4130e64be855e..c430da67c1469 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1380,34 +1380,13 @@ bool Preprocessor::HandleModuleContextualKeyword(Token &Result) {
llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
CurPPLexer->ParsingPreprocessorDirective, true);
- bool ParsingFilename = false;
- if (Result.getIdentifierInfo()->isImportKeyword()) {
- if (getLangOpts().Digraphs && CurLexer &&
- CurLexer->getCurrentBufferOffset() + 2 < CurLexer->getBuffer().size()) {
- // If the import preprocessing token folled by a digraph character '<:',
- // the import preprocessing should not traited as a import contextual
- // keyword. Eg.
- // int
- // import <:10
- // :>;
- //
- // This is a array definition, and equivalent to:
- //
- // int import[10];
- const char *CurPtr = CurLexer->getBufferLocation();
- CurPtr = Lexer::SkipHorizontalWhitespace(CurPtr);
- auto C0 = Lexer::getCharAndSizeNoWarn(CurPtr, getLangOpts());
- auto C1 = Lexer::getCharAndSizeNoWarn(CurPtr + C0.Size, getLangOpts());
- if (C0.Char == '<' && (C1.Char == ':' || C1.Char == '%'))
- return false;
- }
- ParsingFilename = true;
- }
-
// The next token may be an angled string literal after import keyword.
- llvm::SaveAndRestore<bool> SavedParsingFilemame(CurPPLexer->ParsingFilename,
- ParsingFilename);
- std::optional<Token> NextTok = peekNextPPToken();
+ llvm::SaveAndRestore<bool> SavedParsingFilemame(
+ CurPPLexer->ParsingFilename,
+ Result.getIdentifierInfo()->isImportKeyword());
+
+ std::optional<Token> NextTok =
+ CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken();
if (!NextTok)
return false;
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp
index 0e2fb65390e99..989915004ff57 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,8 +38,6 @@
// RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
//--- hash.cpp
@@ -207,27 +205,3 @@ export module m; // expected-error {{module directive lines are not allowed on l
// expected-error {{module declaration must occur at the start of the translation unit}} \
// expected-note@#1 {{add 'module;'}}
#endif
-
-//--- digraph.cpp
-// expected-no-diagnostics
-int
-import <:10
-:>;
-
-void foo() {
- for (int i = 0; i < 10; ++i)
- import[i] = i;
-}
-
-//--- digraph2.cpp
-// expected-no-diagnostics
-using import = int;
-
-void bar(int);
-
-void foo(int val =
-import <%%>
-) {
- bar(val);
-}
-
>From f348770552e584bcc240c8f7136eabfbc1184f93 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Thu, 9 Apr 2026 23:53:22 +0800
Subject: [PATCH 3/7] [C++][Modules] Don't check '<' after 'import' when
converting import pp-token to contextual keyword
Signed-off-by: yronglin <yronglin777 at gmail.com>
---
clang/docs/ReleaseNotes.rst | 1 +
clang/lib/Lex/Preprocessor.cpp | 2 +-
clang/lib/Parse/Parser.cpp | 10 ++++++++
clang/test/CXX/module/cpp.pre/p1.cpp | 36 +++++++++++++++++++++++++++-
4 files changed, 47 insertions(+), 2 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2da7175b51ea3..9c0155265874b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,6 +406,7 @@ Bug Fixes in This Version
- Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect array filler lowering. (#GH189643)
- Fixed the behavior in C23 of ``auto``, by emitting an error when an array type is specified for a ``char *``. (#GH162694)
- Fixed incorrect rejection of ``auto`` with reordered declaration specifiers in C23. (#GH164121)
+- Fixed incorrect handling of C++ import preprocessing token when a digraph character after import. (#GH190693)
Bug Fixes to Compiler Builtins
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index c430da67c1469..9b21777965ed8 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1394,7 +1394,7 @@ bool Preprocessor::HandleModuleContextualKeyword(Token &Result) {
LookUpIdentifierInfo(*NextTok);
if (Result.getIdentifierInfo()->isImportKeyword()) {
- if (NextTok->isOneOf(tok::identifier, tok::less, tok::colon,
+ if (NextTok->isOneOf(tok::identifier, tok::colon,
tok::header_name)) {
Result.setKind(tok::kw_import);
ModuleImportLoc = Result.getLocation();
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index c4f745612e06c..3e57330e9e09a 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -2495,6 +2495,16 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc,
break;
}
+ // FIXME: If the previous token is tok::header_name like the following:
+ //
+ // import <%%>
+ //
+ // The diagnostic location is incorrect.
+ //
+ // <source file>:1:10: error: import directive must end with a ';'
+ // 1 | import <%%>
+ // | ^
+ // | ;
bool LexedSemi = false;
if (getLangOpts().CPlusPlusModules)
LexedSemi =
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp
index 989915004ff57..d0cf0ee8efe1a 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,7 +38,10 @@
// RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
-
+// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph3.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/digraph4.cpp -fsyntax-only -verify
//--- hash.cpp
// expected-no-diagnostics
@@ -205,3 +208,34 @@ export module m; // expected-error {{module directive lines are not allowed on l
// expected-error {{module declaration must occur at the start of the translation unit}} \
// expected-note@#1 {{add 'module;'}}
#endif
+
+//--- digraph.cpp
+// expected-no-diagnostics
+int
+import <:10
+:>;
+
+void foo() {
+ for (int i = 0; i < 10; ++i)
+ import[i] = i;
+}
+
+//--- digraph2.cpp
+// expected-no-diagnostics
+using import = int;
+
+void bar(int);
+
+void foo(int val =
+import <%
+%>
+) {
+ bar(val);
+}
+
+//--- digraph3.cpp
+import <%%>; // expected-error {{'%%' file not found}}
+
+//--- digraph4.cpp
+import <::>; // expected-error {{'::' file not found}}
+
>From ac0b845d14d053c031f1d162d28ba085154b5fc8 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Sun, 12 Apr 2026 02:45:48 +0800
Subject: [PATCH 4/7] [Clang][Preprocessor] Unify header-name lookahead for
import and include
Introduce Preprocessor::isNextPPTokenHeaderNameOrOneOf to centralize
lookahead logic for header-name formation and token classification
under ParsingFilename mode.
Refactor handling of C++20 module/import contextual keywords and
LexHeaderName to use the new helper, ensuring consistent behavior
between `import` and `#include`.
This fixes incorrect acceptance of cases where macro expansion after
a digraph-like `<:` leads to invalid header-name parsing, e.g.:
#define FOO foo>
#include <:FOO
Now such cases are rejected as expected.
Also adjusts peekNextPPToken to properly support dependency directive
lexers.
No functional change intended for valid code; improves correctness and
consistency in edge cases involving header-name lexing.
Signed-off-by: yronglin <yronglin777 at gmail.com>
---
clang/docs/ReleaseNotes.rst | 2 +-
clang/include/clang/Lex/Preprocessor.h | 23 +++++++++++++-
clang/lib/Lex/Lexer.cpp | 18 +++++------
clang/lib/Lex/Preprocessor.cpp | 44 +++++++++++---------------
clang/test/CXX/cpp/cpp.include/p3.cpp | 5 +++
clang/test/CXX/module/cpp.pre/p1.cpp | 33 +++++++++++++------
6 files changed, 79 insertions(+), 46 deletions(-)
create mode 100644 clang/test/CXX/cpp/cpp.include/p3.cpp
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9c0155265874b..dc246af32a70c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -406,7 +406,7 @@ Bug Fixes in This Version
- Fixed a crash on _BitInt(N) arrays where 129 ≤ N ≤ 192 due to incorrect array filler lowering. (#GH189643)
- Fixed the behavior in C23 of ``auto``, by emitting an error when an array type is specified for a ``char *``. (#GH162694)
- Fixed incorrect rejection of ``auto`` with reordered declaration specifiers in C23. (#GH164121)
-- Fixed incorrect handling of C++ import preprocessing token when a digraph character after import. (#GH190693)
+- Fixed incorrect handling of header-name lookahead in C++ import and #include directives involving digraphs and macro expansion. (#GH190693)
Bug Fixes to Compiler Builtins
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index c7e152a75f51f..bb34f00360041 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -48,6 +48,7 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Registry.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/TrailingObjects.h"
#include <cassert>
#include <cstddef>
@@ -1842,6 +1843,26 @@ class Preprocessor {
void HandleCXXImportDirective(Token Import);
void HandleCXXModuleDirective(Token Module);
+ template <typename... Ts> bool isNextPPTokenHeaderNameOrOneOf(Ts... Ks) {
+ // First, tries to form a valid header-name token.
+ llvm::SaveAndRestore<bool> SavedFilename(CurPPLexer->ParsingFilename,
+ true);
+ if (auto Tok = peekNextPPToken()) {
+ if (Tok->is(tok::header_name))
+ return true;
+ }
+
+ // If that fails and it's not one of the other tokens, then it's not a
+ // directive.
+ CurPPLexer->ParsingFilename = false;
+ if (auto NextTok = peekNextPPToken()) {
+ if (NextTok->is(tok::raw_identifier))
+ LookUpIdentifierInfo(*NextTok);
+ return NextTok->isOneOf(Ks...);
+ }
+ return false;
+ }
+
/// Callback invoked when the lexer sees one of export, import or module token
/// at the start of a line.
///
@@ -2393,12 +2414,12 @@ class Preprocessor {
return NextTokOpt.has_value() ? NextTokOpt->is(Ks...) : false;
}
-private:
/// peekNextPPToken - Return std::nullopt if there are no more tokens in the
/// buffer controlled by this lexer, otherwise return the next unexpanded
/// token.
std::optional<Token> peekNextPPToken() const;
+private:
/// Identifiers used for SEH handling in Borland. These are only
/// allowed in particular circumstances
// __except block
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 10246552bb13d..2982788c7de23 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3219,15 +3219,6 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
std::optional<Token> Lexer::peekNextPPToken() {
assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?");
- if (isDependencyDirectivesLexer()) {
- if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
- return std::nullopt;
- Token Result;
- (void)convertDependencyDirectiveToken(
- DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result);
- return Result;
- }
-
// Switch to 'skipping' mode. This will ensure that we can lex a token
// without emitting diagnostics, disables macro expansion, and will cause EOF
// to return an EOF token instead of popping the include stack.
@@ -3242,7 +3233,14 @@ std::optional<Token> Lexer::peekNextPPToken() {
MultipleIncludeOpt MIOptState = MIOpt;
Token Tok;
- Lex(Tok);
+ if (isDependencyDirectivesLexer()) {
+ if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size())
+ return std::nullopt;
+ (void)convertDependencyDirectiveToken(
+ DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Tok);
+ } else {
+ Lex(Tok);
+ }
// Restore state that may have changed.
BufferPtr = TmpBufferPtr;
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 9b21777965ed8..ad7f9683dacaf 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1098,8 +1098,11 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
// __has_include(__has_include))
if (CurPPLexer->ParsingFilename)
LexUnexpandedToken(FilenameTok);
- else
+ else if ((getLangOpts().CPlusPlusModules && isImportingCXXNamedModules()) ||
+ isNextPPTokenHeaderNameOrOneOf(tok::less))
CurPPLexer->LexIncludeFilename(FilenameTok);
+ else
+ Lex(FilenameTok);
} else {
Lex(FilenameTok);
}
@@ -1380,33 +1383,24 @@ bool Preprocessor::HandleModuleContextualKeyword(Token &Result) {
llvm::SaveAndRestore<bool> SavedParsingPreprocessorDirective(
CurPPLexer->ParsingPreprocessorDirective, true);
- // The next token may be an angled string literal after import keyword.
- llvm::SaveAndRestore<bool> SavedParsingFilemame(
- CurPPLexer->ParsingFilename,
- Result.getIdentifierInfo()->isImportKeyword());
-
- std::optional<Token> NextTok =
- CurLexer ? CurLexer->peekNextPPToken() : CurTokenLexer->peekNextPPToken();
- if (!NextTok)
- return false;
-
- if (NextTok->is(tok::raw_identifier))
- LookUpIdentifierInfo(*NextTok);
-
- if (Result.getIdentifierInfo()->isImportKeyword()) {
- if (NextTok->isOneOf(tok::identifier, tok::colon,
- tok::header_name)) {
- Result.setKind(tok::kw_import);
- ModuleImportLoc = Result.getLocation();
- IsAtImport = false;
- return true;
+ if (II->isModuleKeyword()) {
+ if (auto NextTok = peekNextPPToken()) {
+ if (NextTok->is(tok::raw_identifier))
+ LookUpIdentifierInfo(*NextTok);
+ if (NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) {
+ Result.setKind(tok::kw_module);
+ ModuleDeclLoc = Result.getLocation();
+ return true;
+ }
}
+ return false;
}
- if (Result.getIdentifierInfo()->isModuleKeyword() &&
- NextTok->isOneOf(tok::identifier, tok::colon, tok::semi)) {
- Result.setKind(tok::kw_module);
- ModuleDeclLoc = Result.getLocation();
+ if (II->isImportKeyword() &&
+ isNextPPTokenHeaderNameOrOneOf(tok::identifier, tok::colon, tok::less)) {
+ Result.setKind(tok::kw_import);
+ ModuleImportLoc = Result.getLocation();
+ IsAtImport = false;
return true;
}
diff --git a/clang/test/CXX/cpp/cpp.include/p3.cpp b/clang/test/CXX/cpp/cpp.include/p3.cpp
new file mode 100644
index 0000000000000..7afb4af1c9423
--- /dev/null
+++ b/clang/test/CXX/cpp/cpp.include/p3.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 %s -fsyntax-only -verify
+
+#define FOO foo>
+#include <:FOO
+// expected-error at -1 {{expected "FILENAME" or <FILENAME>}}
diff --git a/clang/test/CXX/module/cpp.pre/p1.cpp b/clang/test/CXX/module/cpp.pre/p1.cpp
index d0cf0ee8efe1a..5b6f225f2f58c 100644
--- a/clang/test/CXX/module/cpp.pre/p1.cpp
+++ b/clang/test/CXX/module/cpp.pre/p1.cpp
@@ -38,11 +38,12 @@
// RUN: %clang_cc1 -std=c++20 %t/func_like_macro.cpp -D'm(x)=x' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/lparen.cpp -D'm(x)=x' -D'LPAREN=(' -fsyntax-only -verify
// RUN: %clang_cc1 -std=c++20 %t/control_line.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph2.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph3.cpp -fsyntax-only -verify
-// RUN: %clang_cc1 -std=c++20 %t/digraph4.cpp -fsyntax-only -verify
-
+// RUN: %clang_cc1 -std=c++20 %t/header_name1.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name2.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name3.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name4.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name5.cpp -fsyntax-only -verify
+// RUN: %clang_cc1 -std=c++20 %t/header_name6.cpp -fsyntax-only -verify
//--- hash.cpp
// expected-no-diagnostics
# // preprocessing directive
@@ -209,7 +210,7 @@ export module m; // expected-error {{module directive lines are not allowed on l
// expected-note@#1 {{add 'module;'}}
#endif
-//--- digraph.cpp
+//--- header_name1.cpp
// expected-no-diagnostics
int
import <:10
@@ -220,7 +221,7 @@ void foo() {
import[i] = i;
}
-//--- digraph2.cpp
+//--- header_name2.cpp
// expected-no-diagnostics
using import = int;
@@ -233,9 +234,23 @@ import <%
bar(val);
}
-//--- digraph3.cpp
+//--- header_name3.cpp
+export module M;
import <%%>; // expected-error {{'%%' file not found}}
-//--- digraph4.cpp
+//--- header_name4.cpp
+export module M;
import <::>; // expected-error {{'::' file not found}}
+//--- header_name5.cpp
+export module M;
+#define FOO foo>;
+import <:FOO
+// expected-error at -1 {{use of undeclared identifier 'foo'}}
+// expected-error at -2 {{a type specifier is required for all declarations}}
+// expected-error at -3 {{expected expression}}
+
+//--- header_name6.cpp
+export module M;
+#define HEADER vector>
+import <HEADER; // expected-error {{file not found}}
>From 54449521a68dc686210d9dac49f7f87c8155cc57 Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Sun, 12 Apr 2026 02:56:10 +0800
Subject: [PATCH 5/7] Format
Signed-off-by: yronglin <yronglin777 at gmail.com>
---
clang/include/clang/Lex/Preprocessor.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index bb34f00360041..34bf1c2f0fec7 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1845,8 +1845,8 @@ class Preprocessor {
template <typename... Ts> bool isNextPPTokenHeaderNameOrOneOf(Ts... Ks) {
// First, tries to form a valid header-name token.
- llvm::SaveAndRestore<bool> SavedFilename(CurPPLexer->ParsingFilename,
- true);
+ llvm::SaveAndRestore<bool> SavedParsingFilename(CurPPLexer->ParsingFilename,
+ true);
if (auto Tok = peekNextPPToken()) {
if (Tok->is(tok::header_name))
return true;
>From 3500dda26d523f9a28215d447a99353864c4bd3e Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Mon, 13 Apr 2026 01:09:45 +0800
Subject: [PATCH 6/7] Avoid 2nd peekNextPPToken call
Signed-off-by: yronglin <yronglin777 at gmail.com>
---
clang/include/clang/Lex/Preprocessor.h | 75 ++++++++++++++++++--------
1 file changed, 54 insertions(+), 21 deletions(-)
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 34bf1c2f0fec7..ec29209fc1836 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1843,26 +1843,6 @@ class Preprocessor {
void HandleCXXImportDirective(Token Import);
void HandleCXXModuleDirective(Token Module);
- template <typename... Ts> bool isNextPPTokenHeaderNameOrOneOf(Ts... Ks) {
- // First, tries to form a valid header-name token.
- llvm::SaveAndRestore<bool> SavedParsingFilename(CurPPLexer->ParsingFilename,
- true);
- if (auto Tok = peekNextPPToken()) {
- if (Tok->is(tok::header_name))
- return true;
- }
-
- // If that fails and it's not one of the other tokens, then it's not a
- // directive.
- CurPPLexer->ParsingFilename = false;
- if (auto NextTok = peekNextPPToken()) {
- if (NextTok->is(tok::raw_identifier))
- LookUpIdentifierInfo(*NextTok);
- return NextTok->isOneOf(Ks...);
- }
- return false;
- }
-
/// Callback invoked when the lexer sees one of export, import or module token
/// at the start of a line.
///
@@ -2414,12 +2394,65 @@ class Preprocessor {
return NextTokOpt.has_value() ? NextTokOpt->is(Ks...) : false;
}
+private:
/// peekNextPPToken - Return std::nullopt if there are no more tokens in the
/// buffer controlled by this lexer, otherwise return the next unexpanded
/// token.
std::optional<Token> peekNextPPToken() const;
-private:
+ /// Check whether the next preprocessing token can form a header-name token
+ /// or matches one of the specified token kinds.
+ ///
+ /// This performs a lookahead without consuming tokens:
+ /// - First, it temporarily enables `ParsingFilename` to attempt forming a
+ /// `tok::header_name` (e.g. `<foo>` or "foo").
+ /// - If that succeeds, returns true.
+ /// - Otherwise, it restores normal lexing mode and checks whether the next
+ /// token matches any of the provided kinds `Ks...`.
+ ///
+ /// This helper is used to classify tokens in contexts such as C++20 `import`
+ /// and `#include`, ensuring consistent handling of header-name lexing and
+ /// avoiding unintended lexer state changes.
+ template <typename... Ts> bool isNextPPTokenHeaderNameOrOneOf(Ts... Ks) {
+ // First, tries to form a valid header-name token.
+ llvm::SaveAndRestore<bool> SavedParsingFilename(CurPPLexer->ParsingFilename,
+ true);
+ if (auto NextTok = peekNextPPToken()) {
+ if (NextTok->is(tok::header_name))
+ return true;
+
+ // In ParsingFilename mode, both <...> and "..." are lexed as header-name
+ // tokens. If a valid header-name is formed, return immediately.
+ //
+ // Otherwise, we may need to re-lex the token in normal mode. This is
+ // required for '<' to correctly handle cases such as digraphs ('<:',
+ // '<%') and situations where macro expansion affects token boundaries,
+ // e.g.:
+ //
+ // #define VECTOR vector>
+ // #include <VECTOR
+ //
+ // In such cases, the initial lex in ParsingFilename mode may fail to form
+ // a header-name, and only normal lexing yields the correct tokenization.
+ // For all other tokens, the result is identical between the two modes, so
+ // we can classify them directly and avoid calling the relatively
+ // expensive second peekNextPPToken() on the common path.
+ if (NextTok->isNot(tok::less)) {
+ if (NextTok->is(tok::raw_identifier))
+ LookUpIdentifierInfo(*NextTok);
+ return NextTok->isOneOf(Ks...);
+ }
+ }
+
+ // If that fails and it's not one of the other tokens, then it's not a
+ // directive.
+ CurPPLexer->ParsingFilename = false;
+ if (auto NextTok = peekNextPPToken()) {
+ return NextTok->isOneOf(Ks...);
+ }
+ return false;
+ }
+
/// Identifiers used for SEH handling in Borland. These are only
/// allowed in particular circumstances
// __except block
>From 536afcf6703887461fe4160e6e671969e0747bbe Mon Sep 17 00:00:00 2001
From: yronglin <yronglin777 at gmail.com>
Date: Mon, 13 Apr 2026 01:20:04 +0800
Subject: [PATCH 7/7] Add comments in LexHeaderName
Signed-off-by: yronglin <yronglin777 at gmail.com>
---
clang/lib/Lex/Preprocessor.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index ad7f9683dacaf..b74dd59bdb021 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1098,7 +1098,11 @@ bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
// __has_include(__has_include))
if (CurPPLexer->ParsingFilename)
LexUnexpandedToken(FilenameTok);
- else if ((getLangOpts().CPlusPlusModules && isImportingCXXNamedModules()) ||
+ else if ((getLangOpts().CPlusPlusModules &&
+ isImportingCXXNamedModules()) || // C++ import already checked in
+ // HandleModuleContextualKeyword,
+ // avoid duplicate check in
+ // LexHeaderName.
isNextPPTokenHeaderNameOrOneOf(tok::less))
CurPPLexer->LexIncludeFilename(FilenameTok);
else
More information about the cfe-commits
mailing list