[clang] Insert headers in global module fragment (PR #151624)

Mythreya Kuricheti via cfe-commits cfe-commits at lists.llvm.org
Thu Jul 31 19:32:06 PDT 2025


https://github.com/MythreyaK updated https://github.com/llvm/llvm-project/pull/151624

>From 51db5932277a81581ba5076a7f79eb0b124c2b92 Mon Sep 17 00:00:00 2001
From: Mythreya Kuricheti <git at mythreya.dev>
Date: Thu, 31 Jul 2025 19:14:22 -0700
Subject: [PATCH] [libtooling] Insert headers in global module fragment

---
 .../lib/Tooling/Inclusions/HeaderIncludes.cpp | 119 ++++++++++++------
 .../unittests/Tooling/HeaderIncludesTest.cpp  |  59 +++++++++
 2 files changed, 143 insertions(+), 35 deletions(-)

diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
index 2b5a293b35841..e4e752a947c22 100644
--- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
+++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
@@ -22,6 +22,8 @@ LangOptions createLangOpts() {
   LangOpts.CPlusPlus = 1;
   LangOpts.CPlusPlus11 = 1;
   LangOpts.CPlusPlus14 = 1;
+  LangOpts.CPlusPlus20 = 1;
+  LangOpts.CPlusPlusModules = 1;
   LangOpts.LineComment = 1;
   LangOpts.CXXOperatorNames = 1;
   LangOpts.Bool = 1;
@@ -68,53 +70,100 @@ bool checkAndConsumeDirectiveWithName(
   return Matched;
 }
 
+bool checkAndConsumeDirective(Lexer &Lex, StringRef Name, Token &Tok) {
+  bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && 
+                Tok.is(tok::raw_identifier) && !Lex.LexFromRawLexer(Tok);
+  return Matched;
+}
+
+// Check and consume "module;" directive
+bool checkAndConsumeModuleDecl(Lexer &Lex, Token &Tok) {
+  bool Matched = Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "module" 
+                && !Lex.LexFromRawLexer(Tok)
+                && Tok.is(tok::semi) && Lex.LexFromRawLexer(Tok);
+  return Matched;
+}
+
 void skipComments(Lexer &Lex, Token &Tok) {
   while (Tok.is(tok::comment))
     if (Lex.LexFromRawLexer(Tok))
       return;
 }
 
+// skip to the global module fragment and skip preprocessor ifdefs
+unsigned skipToGlobalModuleFragmentAfterPP(StringRef FileName,
+                                               StringRef Code,
+                                               const IncludeStyle &Style) {
+  return getOffsetAfterTokenSequence(FileName, Code, Style, 
+    [](const SourceManager& SM, Lexer& Lex, Token& Tok) -> unsigned {
+    // skip all comments at start of file 
+    skipComments(Lex, Tok);
+
+    // if pragma once is used, it is a header and must not have "module;"
+    if (!checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
+                                          StringRef("once"))) {
+      checkAndConsumeModuleDecl(Lex, Tok);
+    }
+
+    while(checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+      skipComments(Lex, Tok);
+      if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) {
+        skipComments(Lex, Tok);
+        checkAndConsumeDirective(Lex, "endif", Tok);
+      }
+      skipComments(Lex, Tok);
+    }
+    
+    return SM.getFileOffset(Tok.getLocation());
+  });
+}
+
 // Returns the offset after header guard directives and any comments
 // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
 // header guard is present in the code, this will return the offset after
-// skipping all comments from the start of the code.
-unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
+// skipping all comments from the start of the code. Also ensures that the 
+// offset returned is always in the global module fragment, if modules are in use. 
+unsigned getMinHeaderInsertOffset(StringRef FileName,
                                                StringRef Code,
                                                const IncludeStyle &Style) {
   // \p Consume returns location after header guard or 0 if no header guard is
   // found.
-  auto ConsumeHeaderGuardAndComment =
-      [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
-                                 Token Tok)>
-              Consume) {
-        return getOffsetAfterTokenSequence(
-            FileName, Code, Style,
-            [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
-              skipComments(Lex, Tok);
-              unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
-              return std::max(InitialOffset, Consume(SM, Lex, Tok));
-            });
-      };
-  return std::max(
-      // #ifndef/#define
-      ConsumeHeaderGuardAndComment(
-          [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
-            if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
-              skipComments(Lex, Tok);
-              if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
-                  Tok.isAtStartOfLine())
-                return SM.getFileOffset(Tok.getLocation());
-            }
-            return 0;
-          }),
-      // #pragma once
-      ConsumeHeaderGuardAndComment(
-          [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
-            if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
-                                                 StringRef("once")))
-              return SM.getFileOffset(Tok.getLocation());
-            return 0;
-          }));
+  // auto ConsumeHeaderGuardAndComment =
+  //     [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
+  //                                Token Tok)>
+  //             Consume) {
+  //       return getOffsetAfterTokenSequence(
+  //           FileName, Code, Style,
+  //           [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
+  //             skipComments(Lex, Tok);
+  //             unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
+  //             return std::max(InitialOffset, Consume(SM, Lex, Tok));
+  //           });
+  //     };
+
+  // auto PreprocessorOffset = std::max(
+  //     // #ifndef/#define
+  //     ConsumeHeaderGuardAndComment(
+  //         [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
+  //           if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+  //             skipComments(Lex, Tok);
+  //             if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
+  //                 Tok.isAtStartOfLine())
+  //               return SM.getFileOffset(Tok.getLocation());
+  //           }
+  //           return 0;
+  //         }),
+  //     // #pragma once
+  //     ConsumeHeaderGuardAndComment(
+  //         [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
+  //           if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
+  //                                                StringRef("once")))
+  //             return SM.getFileOffset(Tok.getLocation());
+  //           return 0;
+  //         }));
+  // return std::max(ModuleDecl, PreprocessorOffset);
+
+  return skipToGlobalModuleFragmentAfterPP(FileName, Code, Style);
 }
 
 // Check if a sequence of tokens is like
@@ -281,7 +330,7 @@ HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
                                const IncludeStyle &Style)
     : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
       MinInsertOffset(
-          getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
+          getMinHeaderInsertOffset(FileName, Code, Style)),
       MaxInsertOffset(MinInsertOffset +
                       getMaxHeaderInsertionOffset(
                           FileName, Code.drop_front(MinInsertOffset), Style)),
diff --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp
index 929156a11d0d9..c0f7f0f958898 100644
--- a/clang/unittests/Tooling/HeaderIncludesTest.cpp
+++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp
@@ -594,6 +594,65 @@ TEST_F(HeaderIncludesTest, CanDeleteAfterCode) {
   EXPECT_EQ(Expected, remove(Code, "\"b.h\""));
 }
 
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragment) {
+  // Ensure header insertions go only in the global module fragment
+  std::string Code = R"cpp(// comments
+
+// more comments
+
+module;
+export module foo;
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+  std::string Expected = R"cpp(module;
+#include <vector>
+export module foo;
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+
+  auto InsertedCode = insert(Code, "<vector>");
+  fprintf(stderr, "[[\n%s\n]]\n", InsertedCode.c_str());
+}
+
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragmentWithPP) {
+  // Ensure header insertions go only in the global module fragment
+  std::string Code = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+  std::string Expected = R"cpp(module;
+#include <vector>
+export module foo;
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+
+  auto InsertedCode = insert(Code, "<vector>");
+  fprintf(stderr, "[[\n%s\n]]\n", InsertedCode.c_str());
+}
+
 } // namespace
 } // namespace tooling
 } // namespace clang



More information about the cfe-commits mailing list