[clang] [clang-format] Add the C language instead of treating it like C++ (PR #128287)

Owen Pan via cfe-commits cfe-commits at lists.llvm.org
Fri Feb 21 21:45:21 PST 2025


https://github.com/owenca created https://github.com/llvm/llvm-project/pull/128287

Closes #128120

>From 84cfaec1f830463d2332f14eca9cfbb86919d3d5 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Fri, 21 Feb 2025 21:41:36 -0800
Subject: [PATCH] [clang-format] Add the C language instead of treating it like
 C++

Closes #128120
---
 clang/docs/ClangFormatStyleOptions.rst   |  9 ++++++---
 clang/docs/ReleaseNotes.rst              |  7 ++++---
 clang/include/clang/Format/Format.h      | 14 ++++++++++----
 clang/lib/Format/Format.cpp              | 12 +++++++++++-
 clang/lib/Format/FormatToken.cpp         |  2 +-
 clang/lib/Format/TokenAnnotator.cpp      |  4 ++--
 clang/lib/Format/TokenAnnotator.h        |  2 +-
 clang/lib/Format/UnwrappedLineParser.cpp |  2 +-
 clang/unittests/Format/FormatTest.cpp    |  4 ++++
 9 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index ba50eb2c7e89b..d157c07c9cef8 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -4786,8 +4786,8 @@ the configuration (without a prefix: ``Auto``).
 
   .. note::
 
-   You can also specify the language (``Cpp`` or ``ObjC``) for ``.h`` files
-   by adding a ``// clang-format Language:`` line before the first
+   You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h``
+   files by adding a ``// clang-format Language:`` line before the first
    non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
 
   Possible values:
@@ -4795,8 +4795,11 @@ the configuration (without a prefix: ``Auto``).
   * ``LK_None`` (in configuration: ``None``)
     Do not use.
 
+  * ``LK_C`` (in configuration: ``C``)
+    Should be used for C.
+
   * ``LK_Cpp`` (in configuration: ``Cpp``)
-    Should be used for C, C++.
+    Should be used for C++.
 
   * ``LK_CSharp`` (in configuration: ``CSharp``)
     Should be used for C#.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ec9cab2eb657f..699cbb17edca7 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -271,9 +271,10 @@ clang-format
 - Adds ``BreakBeforeTemplateCloser`` option.
 - Adds ``BinPackLongBracedList`` option to override bin packing options in
   long (20 item or more) braced list initializer lists.
-- Allow specifying the language (C++ or Objective-C) for a ``.h`` file by adding
-  a special comment (e.g. ``// clang-format Language: ObjC``) near the top of
-  the file.
+- Add the C language instead of treating it like C++.
+- Allow specifying the language (C, C++, or Objective-C) for a ``.h`` file by
+  adding a special comment (e.g. ``// clang-format Language: ObjC``) near the
+  top of the file.
 
 libclang
 --------
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 2dc95c3c06d29..46fb1d52701b3 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -3318,7 +3318,9 @@ struct FormatStyle {
   enum LanguageKind : int8_t {
     /// Do not use.
     LK_None,
-    /// Should be used for C, C++.
+    /// Should be used for C.
+    LK_C,
+    /// Should be used for C++.
     LK_Cpp,
     /// Should be used for C#.
     LK_CSharp,
@@ -3343,7 +3345,9 @@ struct FormatStyle {
     /// https://sci-hub.st/10.1109/IEEESTD.2018.8299595
     LK_Verilog
   };
-  bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; }
+  bool isCpp() const {
+    return Language == LK_Cpp || Language == LK_C || Language == LK_ObjC;
+  }
   bool isCSharp() const { return Language == LK_CSharp; }
   bool isJson() const { return Language == LK_Json; }
   bool isJavaScript() const { return Language == LK_JavaScript; }
@@ -3355,8 +3359,8 @@ struct FormatStyle {
 
   /// The language that this format style targets.
   /// \note
-  ///  You can also specify the language (``Cpp`` or ``ObjC``) for ``.h`` files
-  ///  by adding a ``// clang-format Language:`` line before the first
+  ///  You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h``
+  ///  files by adding a ``// clang-format Language:`` line before the first
   ///  non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
   /// \endnote
   /// \version 3.5
@@ -5715,6 +5719,8 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code);
 // Returns a string representation of ``Language``.
 inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
   switch (Language) {
+  case FormatStyle::LK_C:
+    return "C";
   case FormatStyle::LK_Cpp:
     return "C++";
   case FormatStyle::LK_CSharp:
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index b063843078251..001bc4e522792 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -401,6 +401,7 @@ template <> struct MappingTraits<FormatStyle::KeepEmptyLinesStyle> {
 
 template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
   static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
+    IO.enumCase(Value, "C", FormatStyle::LK_C);
     IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
     IO.enumCase(Value, "Java", FormatStyle::LK_Java);
     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
@@ -3957,7 +3958,12 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
   LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11;
 
   LangOpts.LineComment = 1;
-  LangOpts.CXXOperatorNames = Style.isCpp();
+
+  const auto Language = Style.Language;
+  LangOpts.C11 = Language == FormatStyle::LK_C;
+  LangOpts.CXXOperatorNames =
+      Language == FormatStyle::LK_Cpp || Language == FormatStyle::LK_ObjC;
+
   LangOpts.Bool = 1;
   LangOpts.ObjC = 1;
   LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally.
@@ -3982,6 +3988,8 @@ const char *StyleOptionHelpDescription =
     "   --style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
 
 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
+  if (FileName.ends_with(".c"))
+    return FormatStyle::LK_C;
   if (FileName.ends_with(".java"))
     return FormatStyle::LK_Java;
   if (FileName.ends_with_insensitive(".js") ||
@@ -4039,6 +4047,8 @@ static FormatStyle::LanguageKind getLanguageByComment(const Environment &Env) {
       continue;
 
     Text = Text.trim();
+    if (Text == "C")
+      return FormatStyle::LK_C;
     if (Text == "Cpp")
       return FormatStyle::LK_Cpp;
     if (Text == "ObjC")
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index fb040a0043602..97ee90155d870 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -44,7 +44,7 @@ static SmallVector<StringRef> CppNonKeywordTypes = {
 bool FormatToken::isTypeName(const LangOptions &LangOpts) const {
   if (is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts))
     return true;
-  const bool IsCpp = LangOpts.CXXOperatorNames;
+  const bool IsCpp = LangOpts.CXXOperatorNames || LangOpts.C11;
   return IsCpp && is(tok::identifier) &&
          std::binary_search(CppNonKeywordTypes.begin(),
                             CppNonKeywordTypes.end(), TokenText);
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index e68daa422b7c4..24e66333b861b 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -129,7 +129,7 @@ class AnnotatingParser {
       : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
         IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)),
         Keywords(Keywords), Scopes(Scopes), TemplateDeclarationDepth(0) {
-    assert(IsCpp == LangOpts.CXXOperatorNames);
+    assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C11));
     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
     resetTokenMetadata();
   }
@@ -3821,7 +3821,7 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
   };
 
   const auto *Next = Current.Next;
-  const bool IsCpp = LangOpts.CXXOperatorNames;
+  const bool IsCpp = LangOpts.CXXOperatorNames || LangOpts.C11;
 
   // Find parentheses of parameter list.
   if (Current.is(tok::kw_operator)) {
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 6aea310a56d69..7d065f748cbe5 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -225,7 +225,7 @@ class TokenAnnotator {
   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
       : Style(Style), IsCpp(Style.isCpp()),
         LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
-    assert(IsCpp == LangOpts.CXXOperatorNames);
+    assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C11));
   }
 
   /// Adapts the indent levels of comment lines to the indent of the
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 3a24d72d83e27..1ee06905aae43 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -168,7 +168,7 @@ UnwrappedLineParser::UnwrappedLineParser(
                        : IG_Inited),
       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
       Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
-  assert(IsCpp == LangOpts.CXXOperatorNames);
+  assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C11));
 }
 
 void UnwrappedLineParser::reset() {
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 05febf12c17ba..fe0e47080a577 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -24971,6 +24971,7 @@ TEST_F(FormatTest, StructuredBindings) {
 }
 
 TEST_F(FormatTest, FileAndCode) {
+  EXPECT_EQ(FormatStyle::LK_C, guessLanguage("foo.c", ""));
   EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.cc", ""));
   EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.m", ""));
   EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.mm", ""));
@@ -25137,6 +25138,9 @@ TEST_F(FormatTest, GuessLanguageWithChildLines) {
 }
 
 TEST_F(FormatTest, GetLanguageByComment) {
+  EXPECT_EQ(FormatStyle::LK_C,
+            guessLanguage("foo.h", "// clang-format Language: C\n"
+                                   "int i;"));
   EXPECT_EQ(FormatStyle::LK_Cpp,
             guessLanguage("foo.h", "// clang-format Language: Cpp\n"
                                    "int DoStuff(CGRect rect);"));



More information about the cfe-commits mailing list