[clang] c76a9af - [clang][Tooling] Choose header insertion location in global module fragment (#151624)

via cfe-commits cfe-commits at lists.llvm.org
Sun Aug 3 00:20:29 PDT 2025


Author: Mythreya Kuricheti
Date: 2025-08-03T03:20:25-04:00
New Revision: c76a9afe248ac88a19a1c0a2ed456c86c7c55ad3

URL: https://github.com/llvm/llvm-project/commit/c76a9afe248ac88a19a1c0a2ed456c86c7c55ad3
DIFF: https://github.com/llvm/llvm-project/commit/c76a9afe248ac88a19a1c0a2ed456c86c7c55ad3.diff

LOG: [clang][Tooling] Choose header insertion location in global module fragment (#151624)

Ensures that headers are inserted after `module;` declaration by
updating minimum offset.

Added: 
    

Modified: 
    clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
    clang/unittests/Tooling/HeaderIncludesTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
index 2b5a293b35841..e11319e99ba6a 100644
--- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
+++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
@@ -74,13 +74,24 @@ void skipComments(Lexer &Lex, Token &Tok) {
       return;
 }
 
-// Returns the offset after header guard directives and any comments
-// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
-// header guard is present in the code, this will return the offset after
-// skipping all comments from the start of the code.
-unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
-                                               StringRef Code,
-                                               const IncludeStyle &Style) {
+bool checkAndConsumeModuleDecl(const SourceManager &SM, Lexer &Lex,
+                               Token &Tok) {
+  bool Matched = Tok.is(tok::raw_identifier) &&
+                 Tok.getRawIdentifier() == "module" &&
+                 !Lex.LexFromRawLexer(Tok) && Tok.is(tok::semi) &&
+                 !Lex.LexFromRawLexer(Tok);
+  return Matched;
+}
+
+// Determines the minimum offset into the file where we want to insert header
+// includes. This will be put (when available):
+// - after `#pragma once`
+// - after header guards (`#ifdef` and `#define`)
+// - after opening global module (`module;`)
+// - after any comments at the start of the file or immediately following one of
+//   the above constructs
+unsigned getMinHeaderInsertionOffset(StringRef FileName, StringRef Code,
+                                     const IncludeStyle &Style) {
   // \p Consume returns location after header guard or 0 if no header guard is
   // found.
   auto ConsumeHeaderGuardAndComment =
@@ -95,7 +106,17 @@ unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
               return std::max(InitialOffset, Consume(SM, Lex, Tok));
             });
       };
-  return std::max(
+
+  auto ModuleDecl = ConsumeHeaderGuardAndComment(
+      [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
+        if (checkAndConsumeModuleDecl(SM, Lex, Tok)) {
+          skipComments(Lex, Tok);
+          return SM.getFileOffset(Tok.getLocation());
+        }
+        return 0;
+      });
+
+  auto HeaderAndPPOffset = std::max(
       // #ifndef/#define
       ConsumeHeaderGuardAndComment(
           [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
@@ -115,6 +136,7 @@ unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
               return SM.getFileOffset(Tok.getLocation());
             return 0;
           }));
+  return std::max(HeaderAndPPOffset, ModuleDecl);
 }
 
 // Check if a sequence of tokens is like
@@ -280,8 +302,7 @@ const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
 HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
                                const IncludeStyle &Style)
     : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
-      MinInsertOffset(
-          getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
+      MinInsertOffset(getMinHeaderInsertionOffset(FileName, Code, Style)),
       MaxInsertOffset(MinInsertOffset +
                       getMaxHeaderInsertionOffset(
                           FileName, Code.drop_front(MinInsertOffset), Style)),

diff  --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp
index 929156a11d0d9..befe4a3dd5a8a 100644
--- a/clang/unittests/Tooling/HeaderIncludesTest.cpp
+++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp
@@ -594,6 +594,148 @@ TEST_F(HeaderIncludesTest, CanDeleteAfterCode) {
   EXPECT_EQ(Expected, remove(Code, "\"b.h\""));
 }
 
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragment) {
+  // Ensure header insertions go only in the global module fragment
+  std::string Code = R"cpp(// comments
+
+// more comments
+
+module;
+export module foo;
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+  std::string Expected = R"cpp(// comments
+
+// more comments
+
+module;
+#include <vector>
+export module foo;
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+
+  auto InsertedCode = insert(Code, "<vector>");
+  EXPECT_EQ(Expected, insert(Code, "<vector>"));
+}
+
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragmentWithPP) {
+  // Ensure header insertions go only in the global module fragment
+  std::string Code = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+module;
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+  std::string Expected = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+module;
+
+#include <vector>
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+
+  EXPECT_EQ(Expected, insert(Code, "<vector>"));
+}
+
+TEST_F(HeaderIncludesTest, InsertInGlobalModuleFragmentWithPPIncludes) {
+  // Ensure header insertions go only in the global module fragment
+  std::string Code = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+module;
+
+#include "header.h"
+
+#include <string>
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+  std::string Expected = R"cpp(// comments
+
+// more comments
+
+// some more comments
+
+module;
+
+#include "header.h"
+
+#include <string>
+#include <vector>
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// comment
+
+#ifndef MACRO_NAME
+#define MACRO_NAME
+#endif
+
+// more comment
+
+int main() {
+    std::vector<int> ints {};
+})cpp";
+
+  EXPECT_EQ(Expected, insert(Code, "<vector>"));
+}
+
 } // namespace
 } // namespace tooling
 } // namespace clang


        


More information about the cfe-commits mailing list