[clang] Insert headers in global module fragment (PR #151624)
Mythreya Kuricheti via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 31 19:32:06 PDT 2025
https://github.com/MythreyaK updated https://github.com/llvm/llvm-project/pull/151624
>From 51db5932277a81581ba5076a7f79eb0b124c2b92 Mon Sep 17 00:00:00 2001
From: Mythreya Kuricheti <git at mythreya.dev>
Date: Thu, 31 Jul 2025 19:14:22 -0700
Subject: [PATCH] [libtooling] Insert headers in global module fragment
---
.../lib/Tooling/Inclusions/HeaderIncludes.cpp | 119 ++++++++++++------
.../unittests/Tooling/HeaderIncludesTest.cpp | 59 +++++++++
2 files changed, 143 insertions(+), 35 deletions(-)
diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
index 2b5a293b35841..e4e752a947c22 100644
--- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
+++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
@@ -22,6 +22,8 @@ LangOptions createLangOpts() {
LangOpts.CPlusPlus = 1;
LangOpts.CPlusPlus11 = 1;
LangOpts.CPlusPlus14 = 1;
+ LangOpts.CPlusPlus20 = 1;
+ LangOpts.CPlusPlusModules = 1;
LangOpts.LineComment = 1;
LangOpts.CXXOperatorNames = 1;
LangOpts.Bool = 1;
@@ -68,53 +70,100 @@ bool checkAndConsumeDirectiveWithName(
return Matched;
}
+bool checkAndConsumeDirective(Lexer &Lex, StringRef Name, Token &Tok) {
+ bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
+ Tok.is(tok::raw_identifier) && !Lex.LexFromRawLexer(Tok);
+ return Matched;
+}
+
+// Check and consume "module;" directive
+bool checkAndConsumeModuleDecl(Lexer &Lex, Token &Tok) {
+ bool Matched = Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "module"
+ && !Lex.LexFromRawLexer(Tok)
+ && Tok.is(tok::semi) && Lex.LexFromRawLexer(Tok);
+ return Matched;
+}
+
void skipComments(Lexer &Lex, Token &Tok) {
while (Tok.is(tok::comment))
if (Lex.LexFromRawLexer(Tok))
return;
}
+// skip to the global module fragment and skip preprocessor ifdefs
+unsigned skipToGlobalModuleFragmentAfterPP(StringRef FileName,
+ StringRef Code,
+ const IncludeStyle &Style) {
+ return getOffsetAfterTokenSequence(FileName, Code, Style,
+ [](const SourceManager& SM, Lexer& Lex, Token& Tok) -> unsigned {
+ // skip all comments at start of file
+ skipComments(Lex, Tok);
+
+ // if pragma once is used, it is a header and must not have "module;"
+ if (!checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
+ StringRef("once"))) {
+ checkAndConsumeModuleDecl(Lex, Tok);
+ }
+
+ while(checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+ skipComments(Lex, Tok);
+ if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) {
+ skipComments(Lex, Tok);
+ checkAndConsumeDirective(Lex, "endif", Tok);
+ }
+ skipComments(Lex, Tok);
+ }
+
+ return SM.getFileOffset(Tok.getLocation());
+ });
+}
+
// Returns the offset after header guard directives and any comments
// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
// header guard is present in the code, this will return the offset after
-// skipping all comments from the start of the code.
-unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
+// skipping all comments from the start of the code. Also ensures that the
+// offset returned is always in the global module fragment, if modules are in use.
+unsigned getMinHeaderInsertOffset(StringRef FileName,
StringRef Code,
const IncludeStyle &Style) {
// \p Consume returns location after header guard or 0 if no header guard is
// found.
- auto ConsumeHeaderGuardAndComment =
- [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
- Token Tok)>
- Consume) {
- return getOffsetAfterTokenSequence(
- FileName, Code, Style,
- [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
- skipComments(Lex, Tok);
- unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
- return std::max(InitialOffset, Consume(SM, Lex, Tok));
- });
- };
- return std::max(
- // #ifndef/#define
- ConsumeHeaderGuardAndComment(
- [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
- if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
- skipComments(Lex, Tok);
- if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
- Tok.isAtStartOfLine())
- return SM.getFileOffset(Tok.getLocation());
- }
- return 0;
- }),
- // #pragma once
- ConsumeHeaderGuardAndComment(
- [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
- if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
- StringRef("once")))
- return SM.getFileOffset(Tok.getLocation());
- return 0;
- }));
+ // auto ConsumeHeaderGuardAndComment =
+ // [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
+ // Token Tok)>
+ // Consume) {
+ // return getOffsetAfterTokenSequence(
+ // FileName, Code, Style,
+ // [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
+ // skipComments(Lex, Tok);
+ // unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
+ // return std::max(InitialOffset, Consume(SM, Lex, Tok));
+ // });
+ // };
+
+ // auto PreprocessorOffset = std::max(
+ // // #ifndef/#define
+ // ConsumeHeaderGuardAndComment(
+ // [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
+ // if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+ // skipComments(Lex, Tok);
+ // if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
+ // Tok.isAtStartOfLine())
+ // return SM.getFileOffset(Tok.getLocation());
+ // }
+ // return 0;
+ // }),
+ // // #pragma once
+ // ConsumeHeaderGuardAndComment(
+ // [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
+ // if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
+ // StringRef("once")))
+ // return SM.getFileOffset(Tok.getLocation());
+ // return 0;
+ // }));
+ // return std::max(ModuleDecl, PreprocessorOffset);
+
+ return skipToGlobalModuleFragmentAfterPP(FileName, Code, Style);
}
// Check if a sequence of tokens is like
@@ -281,7 +330,7 @@ HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
const IncludeStyle &Style)
: FileName(FileName), Code(Code), FirstIncludeOffset(-1),
MinInsertOffset(
- getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
+ getMinHeaderInsertOffset(FileName, Code, Style)),
MaxInsertOffset(MinInsertOffset +
getMaxHeaderInsertionOffset(
FileName, Code.drop_front(MinInsertOffset), Style)),
diff --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp
index 929156a11d0d9..c0f7f0f958898 100644
--- a/clang/unittests/Tooling/HeaderIncludesTest.cpp
+++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp
@@ -594,6 +594,65 @@ TEST_F(HeaderIncludesTest, CanDeleteAfterCode) {
EXPECT_EQ(Expected, remove(Code, "\"b.h\""));
}
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragment) {
+ // Ensure header insertions go only in the global module fragment
+ std::string Code = R"cpp(// comments
+
+// more comments
+
+module;
+export module foo;
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+ std::string Expected = R"cpp(module;
+#include <vector>
+export module foo;
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+
+ auto InsertedCode = insert(Code, "<vector>");
+ fprintf(stderr, "[[\n%s\n]]\n", InsertedCode.c_str());
+}
+
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragmentWithPP) {
+ // Ensure header insertions go only in the global module fragment
+ std::string Code = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+ std::string Expected = R"cpp(module;
+#include <vector>
+export module foo;
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+
+ auto InsertedCode = insert(Code, "<vector>");
+ fprintf(stderr, "[[\n%s\n]]\n", InsertedCode.c_str());
+}
+
} // namespace
} // namespace tooling
} // namespace clang
More information about the cfe-commits
mailing list