[clang] Insert headers in global module fragment (PR #151624)
Mythreya Kuricheti via cfe-commits
cfe-commits at lists.llvm.org
Sat Aug 2 22:12:51 PDT 2025
https://github.com/MythreyaK updated https://github.com/llvm/llvm-project/pull/151624
>From f7e79ed934bcb05548b6a4d303d6ed6284648f35 Mon Sep 17 00:00:00 2001
From: Mythreya Kuricheti <git at mythreya.dev>
Date: Fri, 1 Aug 2025 01:19:18 -0700
Subject: [PATCH 1/3] [libtooling] Insert headers in global module fragment
---
.../lib/Tooling/Inclusions/HeaderIncludes.cpp | 22 ++++-
.../unittests/Tooling/HeaderIncludesTest.cpp | 81 +++++++++++++++++++
2 files changed, 102 insertions(+), 1 deletion(-)
diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
index 2b5a293b35841..2ad0c8b1ff135 100644
--- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
+++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
@@ -74,6 +74,15 @@ void skipComments(Lexer &Lex, Token &Tok) {
return;
}
+bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex,
+ Token &Tok) {
+ bool Matched = Tok.is(tok::raw_identifier) &&
+ Tok.getRawIdentifier() == "module" &&
+ !Lex.LexFromRawLexer(Tok) && Tok.is(tok::semi) &&
+ !Lex.LexFromRawLexer(Tok);
+ return Matched;
+}
+
// Returns the offset after header guard directives and any comments
// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
// header guard is present in the code, this will return the offset after
@@ -95,7 +104,17 @@ unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
return std::max(InitialOffset, Consume(SM, Lex, Tok));
});
};
- return std::max(
+
+ auto ModuleDecl = ConsumeHeaderGuardAndComment(
+ [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
+ if (checkAndConsumeModuleDecl(SM, Lex, Tok)) {
+ skipComments(Lex, Tok);
+ return SM.getFileOffset(Tok.getLocation());
+ }
+ return 0;
+ });
+
+ auto HeaderAndPPOffset = std::max(
// #ifndef/#define
ConsumeHeaderGuardAndComment(
[](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
@@ -115,6 +134,7 @@ unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
return SM.getFileOffset(Tok.getLocation());
return 0;
}));
+ return std::max(HeaderAndPPOffset, ModuleDecl);
}
// Check if a sequence of tokens is like
diff --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp
index 929156a11d0d9..9308513225d9a 100644
--- a/clang/unittests/Tooling/HeaderIncludesTest.cpp
+++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp
@@ -594,6 +594,87 @@ TEST_F(HeaderIncludesTest, CanDeleteAfterCode) {
EXPECT_EQ(Expected, remove(Code, "\"b.h\""));
}
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragment) {
+ // Ensure header insertions go only in the global module fragment
+ std::string Code = R"cpp(// comments
+
+// more comments
+
+module;
+export module foo;
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+ std::string Expected = R"cpp(// comments
+
+// more comments
+
+module;
+#include <vector>
+export module foo;
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+
+ auto InsertedCode = insert(Code, "<vector>");
+ EXPECT_EQ(Expected, insert(Code, "<vector>"));
+}
+
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragmentWithPP) {
+ // Ensure header insertions go only in the global module fragment
+ std::string Code = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+module;
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+ std::string Expected = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+module;
+
+#include <vector>
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+
+ EXPECT_EQ(Expected, insert(Code, "<vector>"));
+}
+
} // namespace
} // namespace tooling
} // namespace clang
>From 973ace64a6af7c8664c3f1b48660989e3b871b2c Mon Sep 17 00:00:00 2001
From: Mythreya Kuricheti <git at mythreya.dev>
Date: Sat, 2 Aug 2025 00:03:51 -0700
Subject: [PATCH 2/3] code review
---
.../unittests/Tooling/HeaderIncludesTest.cpp | 61 +++++++++++++++++++
1 file changed, 61 insertions(+)
diff --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp
index 9308513225d9a..befe4a3dd5a8a 100644
--- a/clang/unittests/Tooling/HeaderIncludesTest.cpp
+++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp
@@ -675,6 +675,67 @@ int main() {
EXPECT_EQ(Expected, insert(Code, "<vector>"));
}
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragmentWithPPIncludes) {
+ // Ensure header insertions go only in the global module fragment
+ std::string Code = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+module;
+
+#include "header.h"
+
+#include <string>
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+ std::string Expected = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+module;
+
+#include "header.h"
+
+#include <string>
+#include <vector>
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+ std::vector<int> ints {};
+})cpp";
+
+ EXPECT_EQ(Expected, insert(Code, "<vector>"));
+}
+
} // namespace
} // namespace tooling
} // namespace clang
>From e0449388177ee0340886248f2b393b6ebd7a926d Mon Sep 17 00:00:00 2001
From: Mythreya Kuricheti <git at mythreya.dev>
Date: Sat, 2 Aug 2025 22:12:37 -0700
Subject: [PATCH 3/3] code review
---
.../lib/Tooling/Inclusions/HeaderIncludes.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
index 2ad0c8b1ff135..a9fc09411dfbf 100644
--- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
+++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
@@ -83,13 +83,14 @@ bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex,
return Matched;
}
-// Returns the offset after header guard directives and any comments
-// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
-// header guard is present in the code, this will return the offset after
-// skipping all comments from the start of the code.
-unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
- StringRef Code,
- const IncludeStyle &Style) {
+// If file does not use modules, returns the offset after header guard
+// directives and any comments before/after header guards (e.g.
+// `#ifndef/#define` pair, `#pragma once`). If no header guard is present in the
+// code, this will return the offset after skipping all comments from the start
+// of the code. If modules are in use, the offset will be in the global module
+// fragment.
+unsigned getMinHeaderInsertionOffset(StringRef FileName, StringRef Code,
+ const IncludeStyle &Style) {
// \p Consume returns location after header guard or 0 if no header guard is
// found.
auto ConsumeHeaderGuardAndComment =
@@ -300,8 +301,7 @@ const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
const IncludeStyle &Style)
: FileName(FileName), Code(Code), FirstIncludeOffset(-1),
- MinInsertOffset(
- getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
+ MinInsertOffset(getMinHeaderInsertionOffset(FileName, Code, Style)),
MaxInsertOffset(MinInsertOffset +
getMaxHeaderInsertionOffset(
FileName, Code.drop_front(MinInsertOffset), Style)),
More information about the cfe-commits
mailing list