[llvm-branch-commits] [clang] [clang-format] Fix a bug that changes keyword `or` to an identifier (PR #128996)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Feb 26 20:38:17 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-clang-format
Author: Owen Pan (owenca)
<details>
<summary>Changes</summary>
Backports ffc61dc393e4 0968df9c3a55 2d585ccecc45
Fixes #<!-- -->105482
---
Full diff: https://github.com/llvm/llvm-project/pull/128996.diff
11 Files Affected:
- (modified) clang/docs/ClangFormatStyleOptions.rst (+11-2)
- (modified) clang/docs/ReleaseNotes.rst (+4)
- (modified) clang/include/clang/Format/Format.h (+14-3)
- (modified) clang/lib/Format/Format.cpp (+42-1)
- (modified) clang/lib/Format/FormatToken.cpp (+5-5)
- (modified) clang/lib/Format/FormatToken.h (-23)
- (modified) clang/lib/Format/TokenAnnotator.cpp (+2-2)
- (modified) clang/lib/Format/TokenAnnotator.h (+1-1)
- (modified) clang/lib/Format/UnwrappedLineParser.cpp (+1-7)
- (modified) clang/unittests/Format/FormatTest.cpp (+17-2)
- (modified) clang/unittests/Format/TokenAnnotatorTest.cpp (+9-2)
``````````diff
diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index bbb912eb10e94..4b4c412a13323 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -4735,15 +4735,24 @@ the configuration (without a prefix: ``Auto``).
.. _Language:
**Language** (``LanguageKind``) :versionbadge:`clang-format 3.5` :ref:`ΒΆ <Language>`
- Language, this format style is targeted at.
+ The language that this format style targets.
+
+ .. note::
+
+ You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h``
+ files by adding a ``// clang-format Language:`` line before the first
+ non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
Possible values:
* ``LK_None`` (in configuration: ``None``)
Do not use.
+ * ``LK_C`` (in configuration: ``C``)
+ Should be used for C.
+
* ``LK_Cpp`` (in configuration: ``Cpp``)
- Should be used for C, C++.
+ Should be used for C++.
* ``LK_CSharp`` (in configuration: ``CSharp``)
Should be used for C#.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 153afdb3d59e3..57a567509a068 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1358,6 +1358,10 @@ clang-format
- Adds ``WrapNamespaceBodyWithEmptyLines`` option.
- Adds the ``IndentExportBlock`` option.
- Adds ``PenaltyBreakBeforeMemberAccess`` option.
+- Add the C language instead of treating it like C++.
+- Allow specifying the language (C, C++, or Objective-C) for a ``.h`` file by
+ adding a special comment (e.g. ``// clang-format Language: ObjC``) near the
+ top of the file.
libclang
--------
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 6f432d1d50315..abab543518222 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -3275,7 +3275,9 @@ struct FormatStyle {
enum LanguageKind : int8_t {
/// Do not use.
LK_None,
- /// Should be used for C, C++.
+ /// Should be used for C.
+ LK_C,
+ /// Should be used for C++.
LK_Cpp,
/// Should be used for C#.
LK_CSharp,
@@ -3300,7 +3302,9 @@ struct FormatStyle {
/// https://sci-hub.st/10.1109/IEEESTD.2018.8299595
LK_Verilog
};
- bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; }
+ bool isCpp() const {
+ return Language == LK_Cpp || Language == LK_C || Language == LK_ObjC;
+ }
bool isCSharp() const { return Language == LK_CSharp; }
bool isJson() const { return Language == LK_Json; }
bool isJavaScript() const { return Language == LK_JavaScript; }
@@ -3310,7 +3314,12 @@ struct FormatStyle {
}
bool isTableGen() const { return Language == LK_TableGen; }
- /// Language, this format style is targeted at.
+ /// The language that this format style targets.
+ /// \note
+ /// You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h``
+ /// files by adding a ``// clang-format Language:`` line before the first
+ /// non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``.
+ /// \endnote
/// \version 3.5
LanguageKind Language;
@@ -5665,6 +5674,8 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code);
// Returns a string representation of ``Language``.
inline StringRef getLanguageName(FormatStyle::LanguageKind Language) {
switch (Language) {
+ case FormatStyle::LK_C:
+ return "C";
case FormatStyle::LK_Cpp:
return "C++";
case FormatStyle::LK_CSharp:
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index f02bf95cfeed7..0bb8545884442 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -401,6 +401,7 @@ template <> struct MappingTraits<FormatStyle::KeepEmptyLinesStyle> {
template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) {
+ IO.enumCase(Value, "C", FormatStyle::LK_C);
IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp);
IO.enumCase(Value, "Java", FormatStyle::LK_Java);
IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
@@ -3952,7 +3953,12 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11;
LangOpts.LineComment = 1;
- LangOpts.CXXOperatorNames = Style.isCpp();
+
+ const auto Language = Style.Language;
+ LangOpts.C17 = Language == FormatStyle::LK_C;
+ LangOpts.CXXOperatorNames =
+ Language == FormatStyle::LK_Cpp || Language == FormatStyle::LK_ObjC;
+
LangOpts.Bool = 1;
LangOpts.ObjC = 1;
LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
@@ -3977,6 +3983,8 @@ const char *StyleOptionHelpDescription =
" --style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
+ if (FileName.ends_with(".c"))
+ return FormatStyle::LK_C;
if (FileName.ends_with(".java"))
return FormatStyle::LK_Java;
if (FileName.ends_with_insensitive(".js") ||
@@ -4016,6 +4024,35 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
return FormatStyle::LK_Cpp;
}
+static FormatStyle::LanguageKind getLanguageByComment(const Environment &Env) {
+ const auto ID = Env.getFileID();
+ const auto &SourceMgr = Env.getSourceManager();
+
+ LangOptions LangOpts;
+ LangOpts.CPlusPlus = 1;
+ LangOpts.LineComment = 1;
+
+ Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts);
+ Lex.SetCommentRetentionState(true);
+
+ for (Token Tok; !Lex.LexFromRawLexer(Tok) && Tok.is(tok::comment);) {
+ auto Text = StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
+ Tok.getLength());
+ if (!Text.consume_front("// clang-format Language:"))
+ continue;
+
+ Text = Text.trim();
+ if (Text == "C")
+ return FormatStyle::LK_C;
+ if (Text == "Cpp")
+ return FormatStyle::LK_Cpp;
+ if (Text == "ObjC")
+ return FormatStyle::LK_ObjC;
+ }
+
+ return FormatStyle::LK_None;
+}
+
FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) {
const auto GuessedLanguage = getLanguageByFileName(FileName);
if (GuessedLanguage == FormatStyle::LK_Cpp) {
@@ -4025,6 +4062,10 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) {
if (!Code.empty() && (Extension.empty() || Extension == ".h")) {
auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName;
Environment Env(Code, NonEmptyFileName, /*Ranges=*/{});
+ if (const auto Language = getLanguageByComment(Env);
+ Language != FormatStyle::LK_None) {
+ return Language;
+ }
ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle());
Guesser.process();
if (Guesser.isObjC())
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index 963e8f87793fa..60e428123d26d 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -42,11 +42,11 @@ static SmallVector<StringRef> CppNonKeywordTypes = {
};
bool FormatToken::isTypeName(const LangOptions &LangOpts) const {
- const bool IsCpp = LangOpts.CXXOperatorNames;
- return is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts) ||
- (IsCpp && is(tok::identifier) &&
- std::binary_search(CppNonKeywordTypes.begin(),
- CppNonKeywordTypes.end(), TokenText));
+ if (is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts))
+ return true;
+ return (LangOpts.CXXOperatorNames || LangOpts.C17) && is(tok::identifier) &&
+ std::binary_search(CppNonKeywordTypes.begin(),
+ CppNonKeywordTypes.end(), TokenText);
}
bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 29aba281ae103..02429970599c0 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -743,29 +743,6 @@ struct FormatToken {
return isOneOf(tok::star, tok::amp, tok::ampamp);
}
- bool isCppAlternativeOperatorKeyword() const {
- assert(!TokenText.empty());
- if (!isalpha(TokenText[0]))
- return false;
-
- switch (Tok.getKind()) {
- case tok::ampamp:
- case tok::ampequal:
- case tok::amp:
- case tok::pipe:
- case tok::tilde:
- case tok::exclaim:
- case tok::exclaimequal:
- case tok::pipepipe:
- case tok::pipeequal:
- case tok::caret:
- case tok::caretequal:
- return true;
- default:
- return false;
- }
- }
-
bool isUnaryOperator() const {
switch (Tok.getKind()) {
case tok::plus:
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index ac5b25d52ce84..976c4d888e1fd 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -129,7 +129,7 @@ class AnnotatingParser {
: Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)),
Keywords(Keywords), Scopes(Scopes), TemplateDeclarationDepth(0) {
- assert(IsCpp == LangOpts.CXXOperatorNames);
+ assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17));
Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
resetTokenMetadata();
}
@@ -3820,7 +3820,7 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts,
};
const auto *Next = Current.Next;
- const bool IsCpp = LangOpts.CXXOperatorNames;
+ const bool IsCpp = LangOpts.CXXOperatorNames || LangOpts.C17;
// Find parentheses of parameter list.
if (Current.is(tok::kw_operator)) {
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 6aea310a56d69..c0c13941ef4f7 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -225,7 +225,7 @@ class TokenAnnotator {
TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
: Style(Style), IsCpp(Style.isCpp()),
LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
- assert(IsCpp == LangOpts.CXXOperatorNames);
+ assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17));
}
/// Adapts the indent levels of comment lines to the indent of the
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 1411197e32554..9b4257fdd8c8f 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -168,7 +168,7 @@ UnwrappedLineParser::UnwrappedLineParser(
: IG_Inited),
IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn),
Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {
- assert(IsCpp == LangOpts.CXXOperatorNames);
+ assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17));
}
void UnwrappedLineParser::reset() {
@@ -1712,12 +1712,6 @@ void UnwrappedLineParser::parseStructuralElement(
OpeningBrace && OpeningBrace->isOneOf(TT_RequiresExpressionLBrace,
TT_CompoundRequirementLBrace);
!eof();) {
- if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) {
- if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true);
- Next && Next->isBinaryOperator()) {
- FormatTok->Tok.setKind(tok::identifier);
- }
- }
const FormatToken *Previous = FormatTok->Previous;
switch (FormatTok->Tok.getKind()) {
case tok::at:
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 3b7856d6ee150..d1e96e0fa544a 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -17784,9 +17784,11 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeAssignmentOperators) {
verifyFormat("int a = 5;");
verifyFormat("a += 42;");
verifyFormat("a or_eq 8;");
- verifyFormat("xor = foo;");
- FormatStyle Spaces = getLLVMStyle();
+ auto Spaces = getLLVMStyle(FormatStyle::LK_C);
+ verifyFormat("xor = foo;", Spaces);
+
+ Spaces.Language = FormatStyle::LK_Cpp;
Spaces.SpaceBeforeAssignmentOperators = false;
verifyFormat("int a= 5;", Spaces);
verifyFormat("a+= 42;", Spaces);
@@ -24683,6 +24685,7 @@ TEST_F(FormatTest, StructuredBindings) {
}
TEST_F(FormatTest, FileAndCode) {
+ EXPECT_EQ(FormatStyle::LK_C, guessLanguage("foo.c", ""));
EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.cc", ""));
EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.m", ""));
EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.mm", ""));
@@ -24848,6 +24851,18 @@ TEST_F(FormatTest, GuessLanguageWithChildLines) {
guessLanguage("foo.h", "#define FOO ({ foo(); ({ NSString *s; }) })"));
}
+TEST_F(FormatTest, GetLanguageByComment) {
+ EXPECT_EQ(FormatStyle::LK_C,
+ guessLanguage("foo.h", "// clang-format Language: C\n"
+ "int i;"));
+ EXPECT_EQ(FormatStyle::LK_Cpp,
+ guessLanguage("foo.h", "// clang-format Language: Cpp\n"
+ "int DoStuff(CGRect rect);"));
+ EXPECT_EQ(FormatStyle::LK_ObjC,
+ guessLanguage("foo.h", "// clang-format Language: ObjC\n"
+ "int i;"));
+}
+
TEST_F(FormatTest, TypenameMacros) {
std::vector<std::string> TypenameMacros = {"STACK_OF", "LIST", "TAILQ_ENTRY"};
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index dffb07c89bacc..f1a6999cfdfb8 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -3646,6 +3646,11 @@ TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) {
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[3], tok::pipepipe, TT_BinaryOperator);
+ Tokens = annotate("return segment < *this or *this < segment;");
+ ASSERT_EQ(Tokens.size(), 12u) << Tokens;
+ EXPECT_TOKEN(Tokens[5], tok::pipepipe, TT_BinaryOperator);
+ EXPECT_TOKEN(Tokens[6], tok::star, TT_UnaryOperator);
+
Tokens = annotate("a = b or_eq c;");
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[3], tok::pipeequal, TT_BinaryOperator);
@@ -3658,11 +3663,13 @@ TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) {
ASSERT_EQ(Tokens.size(), 7u) << Tokens;
EXPECT_TOKEN(Tokens[3], tok::caretequal, TT_BinaryOperator);
- Tokens = annotate("xor = foo;");
+ const auto StyleC = getLLVMStyle(FormatStyle::LK_C);
+
+ Tokens = annotate("xor = foo;", StyleC);
ASSERT_EQ(Tokens.size(), 5u) << Tokens;
EXPECT_TOKEN(Tokens[0], tok::identifier, TT_Unknown);
- Tokens = annotate("int xor = foo;");
+ Tokens = annotate("int xor = foo;", StyleC);
ASSERT_EQ(Tokens.size(), 6u) << Tokens;
EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName);
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/128996
More information about the llvm-branch-commits
mailing list