[clang] [clang-format] Allow specifying the language for `.h` files (PR #128122)

Owen Pan via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 21 19:55:59 PST 2025


https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/128122

>From 507c54acac3e73826f63691c901ceba9c569869f Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Thu, 20 Feb 2025 20:02:44 -0800
Subject: [PATCH 1/2] [clang-format] Allow specifying the language for `.h`
 files

Closes #128119
---
 clang/docs/ClangFormatStyleOptions.rst |  8 ++++++-
 clang/docs/ReleaseNotes.rst            |  3 +++
 clang/include/clang/Format/Format.h    |  7 +++++-
 clang/lib/Format/Format.cpp            | 33 ++++++++++++++++++++++++++
 clang/unittests/Format/FormatTest.cpp  |  9 +++++++
 5 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index bf6dd9e13915f..2d4ead76cfef2 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -4782,7 +4782,13 @@ the configuration (without a prefix: ``Auto``).
 .. _Language:
 
 **Language** (``LanguageKind``) :versionbadge:`clang-format 3.5` :ref:`ΒΆ <Language>`
-  Language, this format style is targeted at.
+  The language that this format style targets.
+
+  .. note::
+
+   You can also specify the language (``Cpp`` or ``ObjC``) for ``.h`` files
+   by adding a ``// clang-format Language:`` line before the first
+   non-comment and non-empty line, e.g. ``// clang-format Language: ObjC``.
 
   Possible values:
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index e1c61992512b5..0e65f72623f28 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -269,6 +269,9 @@ clang-format
 - Adds ``BreakBeforeTemplateCloser`` option.
 - Adds ``BinPackLongBracedList`` option to override bin packing options in
   long (20 item or more) braced list initializer lists.
+- Allow specifying the language (C++ or Objective-C) for a ``.h`` file by adding
+  a special comment (e.g. ``// clang-format Language: ObjC``) near the top of
+  the file.
 
 libclang
 --------
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 16956b4e0fbd4..55709a0261b12 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -3353,7 +3353,12 @@ struct FormatStyle {
   }
   bool isTableGen() const { return Language == LK_TableGen; }
 
-  /// Language, this format style is targeted at.
+  /// The language that this format style targets.
+  /// \note
+  ///  You can also specify the language (``Cpp`` or ``ObjC``) for ``.h`` files
+  ///  by adding a ``// clang-format Language:`` line before the first
+  ///  non-comment and non-empty line, e.g. ``// clang-format Language: ObjC``.
+  /// \endnote
   /// \version 3.5
   LanguageKind Language;
 
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 0898b69528ebc..400f39ecc5483 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -4021,6 +4021,35 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
   return FormatStyle::LK_Cpp;
 }
 
+static FormatStyle::LanguageKind getLanguageByComment(const Environment &Env) {
+  const auto ID = Env.getFileID();
+  const auto &SourceMgr = Env.getSourceManager();
+
+  LangOptions LangOpts;
+  LangOpts.CPlusPlus = 1;
+  LangOpts.LineComment = 1;
+
+  Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
+  Lex.SetCommentRetentionState(true);
+
+  for (Token Tok; !Lex.LexFromRawLexer(Tok) && Tok.is(tok::comment);) {
+    auto Text = StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
+                          Tok.getLength());
+    if (!Text.consume_front("// clang-format Language:"))
+      continue;
+
+    Text = Text.trim();
+    // if (Text == "C")
+    //   return FormatStyle::LK_C;
+    if (Text == "Cpp")
+      return FormatStyle::LK_Cpp;
+    if (Text == "ObjC")
+      return FormatStyle::LK_ObjC;
+  }
+
+  return FormatStyle::LK_None;
+}
+
 FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) {
   const auto GuessedLanguage = getLanguageByFileName(FileName);
   if (GuessedLanguage == FormatStyle::LK_Cpp) {
@@ -4030,6 +4059,10 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) {
     if (!Code.empty() && (Extension.empty() || Extension == ".h")) {
       auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName;
       Environment Env(Code, NonEmptyFileName, /*Ranges=*/{});
+      if (const auto Language = getLanguageByComment(Env);
+          Language != FormatStyle::LK_None) {
+        return Language;
+      }
       ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle());
       Guesser.process();
       if (Guesser.isObjC())
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 132264486100d..05febf12c17ba 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -25136,6 +25136,15 @@ TEST_F(FormatTest, GuessLanguageWithChildLines) {
       guessLanguage("foo.h", "#define FOO ({ foo(); ({ NSString *s; }) })"));
 }
 
+TEST_F(FormatTest, GetLanguageByComment) {
+  EXPECT_EQ(FormatStyle::LK_Cpp,
+            guessLanguage("foo.h", "// clang-format Language: Cpp\n"
+                                   "int DoStuff(CGRect rect);"));
+  EXPECT_EQ(FormatStyle::LK_ObjC,
+            guessLanguage("foo.h", "// clang-format Language: ObjC\n"
+                                   "int i;"));
+}
+
 TEST_F(FormatTest, TypenameMacros) {
   std::vector<std::string> TypenameMacros = {"STACK_OF", "LIST", "TAILQ_ENTRY"};
 

>From 97e9ea0433e40b6f65c98918e0c453421fbc24bf Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Fri, 21 Feb 2025 19:55:50 -0800
Subject: [PATCH 2/2] Minor cleanup

---
 clang/docs/ClangFormatStyleOptions.rst | 2 +-
 clang/include/clang/Format/Format.h    | 2 +-
 clang/lib/Format/Format.cpp            | 2 --
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 2d4ead76cfef2..ba50eb2c7e89b 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -4788,7 +4788,7 @@ the configuration (without a prefix: ``Auto``).
 
    You can also specify the language (``Cpp`` or ``ObjC``) for ``.h`` files
    by adding a ``// clang-format Language:`` line before the first
-   non-comment and non-empty line, e.g. ``// clang-format Language: ObjC``.
+   non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
 
   Possible values:
 
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 55709a0261b12..2dc95c3c06d29 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -3357,7 +3357,7 @@ struct FormatStyle {
   /// \note
   ///  You can also specify the language (``Cpp`` or ``ObjC``) for ``.h`` files
   ///  by adding a ``// clang-format Language:`` line before the first
-  ///  non-comment and non-empty line, e.g. ``// clang-format Language: ObjC``.
+  ///  non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
   /// \endnote
   /// \version 3.5
   LanguageKind Language;
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 400f39ecc5483..b063843078251 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -4039,8 +4039,6 @@ static FormatStyle::LanguageKind getLanguageByComment(const Environment &Env) {
       continue;
 
     Text = Text.trim();
-    // if (Text == "C")
-    //   return FormatStyle::LK_C;
     if (Text == "Cpp")
       return FormatStyle::LK_Cpp;
     if (Text == "ObjC")



More information about the cfe-commits mailing list