[clang] [clang-format] Split line comments separated by backslashes (PR #147648)
Owen Pan via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 9 01:03:27 PDT 2025
https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/147648
>From 3efb5ca11d67ad36c4cbb1ea78220a1ca69a8339 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Tue, 8 Jul 2025 21:16:12 -0700
Subject: [PATCH 1/2] [clang-format] Split line comments separated by
backslashes
Fixes #147341
---
clang/docs/ClangFormatStyleOptions.rst | 6 +++---
clang/include/clang/Format/Format.h | 6 +++---
clang/lib/Format/FormatTokenLexer.cpp | 20 ++++++++++---------
clang/unittests/Format/FormatTestComments.cpp | 18 ++++++++---------
clang/unittests/Format/TokenAnnotatorTest.cpp | 7 +++++++
5 files changed, 32 insertions(+), 25 deletions(-)
diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index c61c808831704..ab374c1886165 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -1554,9 +1554,9 @@ the configuration (without a prefix: ``Auto``).
.. code-block:: c++
- #define A \
- int aaaa; \
- int b; \
+ #define A \
+ int aaaa; \
+ int b; \
int dddddddddd;
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 5d1fdb153b26e..74b516fe4f071 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -513,9 +513,9 @@ struct FormatStyle {
ENAS_LeftWithLastLine,
/// Align escaped newlines in the right-most column.
/// \code
- /// #define A \
- /// int aaaa; \
- /// int b; \
+ /// #define A \
+ /// int aaaa; \
+ /// int b; \
/// int dddddddddd;
/// \endcode
ENAS_Right,
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 40b62b2a993d8..91460ad5f4038 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -1336,16 +1336,18 @@ FormatToken *FormatTokenLexer::getNextToken() {
// finds comments that contain a backslash followed by a line break, truncates
// the comment token at the backslash, and resets the lexer to restart behind
// the backslash.
- if ((Style.isJavaScript() || Style.isJava()) && FormatTok->is(tok::comment) &&
- FormatTok->TokenText.starts_with("//")) {
- size_t BackslashPos = FormatTok->TokenText.find('\\');
- while (BackslashPos != StringRef::npos) {
- if (BackslashPos + 1 < FormatTok->TokenText.size() &&
- FormatTok->TokenText[BackslashPos + 1] == '\n') {
- truncateToken(BackslashPos + 1);
- break;
+ if ((Style.isCpp() || Style.isJavaScript() || Style.isJava()) &&
+ FormatTok->is(tok::comment)) {
+ if (const auto Text = FormatTok->TokenText; Text.starts_with("//")) {
+ for (auto Pos = Text.find('\\'); Pos++ != StringRef::npos;
+ Pos = Text.find('\\', Pos)) {
+ if (Pos < Text.size() && Text[Pos] == '\n' &&
+ (!Style.isCpp() ||
+ Text.substr(Pos + 1).ltrim().starts_with("//"))) {
+ truncateToken(Pos);
+ break;
+ }
}
- BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);
}
}
diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp
index a16fbffb76270..88707551b7698 100644
--- a/clang/unittests/Format/FormatTestComments.cpp
+++ b/clang/unittests/Format/FormatTestComments.cpp
@@ -747,16 +747,14 @@ TEST_F(FormatTestComments, DontSplitLineCommentsWithEscapedNewlines) {
" // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n"
" // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
getLLVMStyleWithColumns(50)));
- // FIXME: One day we might want to implement adjustment of leading whitespace
- // of the consecutive lines in this kind of comment:
- EXPECT_EQ("double\n"
- " a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n"
- " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n"
- " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
- format("double a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n"
- " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n"
- " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
- getLLVMStyleWithColumns(49)));
+ verifyFormat("double\n"
+ " a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n"
+ " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n"
+ " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
+ "double a; // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n"
+ " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\\\n"
+ " // AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
+ getLLVMStyleWithColumns(49));
}
TEST_F(FormatTestComments, DontIntroduceMultilineComments) {
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index a1285e4bc9bf8..e281a4945a862 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -4126,6 +4126,13 @@ TEST_F(TokenAnnotatorTest, JsonCodeInRawString) {
EXPECT_TOKEN(Tokens[6], tok::colon, TT_DictLiteral);
}
+TEST_F(TokenAnnotatorTest, LineCommentTrailingBackslash) {
+ auto Tokens = annotate("// a \\\n"
+ "// b");
+ ASSERT_EQ(Tokens.size(), 3u) << Tokens;
+ EXPECT_TOKEN(Tokens[1], tok::comment, TT_LineComment);
+}
+
} // namespace
} // namespace format
} // namespace clang
>From fd146e58440518be6802104439293f743ba2079f Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Wed, 9 Jul 2025 00:45:07 -0700
Subject: [PATCH 2/2] NFC
---
clang/lib/Format/FormatTokenLexer.cpp | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 91460ad5f4038..d8ee5cb90aaa4 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -1329,6 +1329,8 @@ FormatToken *FormatTokenLexer::getNextToken() {
if (FormatTok->is(tok::unknown))
FormatTok->setType(TT_ImplicitStringLiteral);
+ const bool IsCpp = Style.isCpp();
+
// JavaScript and Java do not allow to escape the end of the line with a
// backslash. Backslashes are syntax errors in plain source, but can occur in
// comments. When a single line comment ends with a \, it'll cause the next
@@ -1336,17 +1338,16 @@ FormatToken *FormatTokenLexer::getNextToken() {
// finds comments that contain a backslash followed by a line break, truncates
// the comment token at the backslash, and resets the lexer to restart behind
// the backslash.
- if ((Style.isCpp() || Style.isJavaScript() || Style.isJava()) &&
- FormatTok->is(tok::comment)) {
- if (const auto Text = FormatTok->TokenText; Text.starts_with("//")) {
- for (auto Pos = Text.find('\\'); Pos++ != StringRef::npos;
- Pos = Text.find('\\', Pos)) {
- if (Pos < Text.size() && Text[Pos] == '\n' &&
- (!Style.isCpp() ||
- Text.substr(Pos + 1).ltrim().starts_with("//"))) {
- truncateToken(Pos);
- break;
- }
+ if (const auto Text = FormatTok->TokenText;
+ Text.starts_with("//") &&
+ (IsCpp || Style.isJavaScript() || Style.isJava())) {
+ assert(FormatTok->is(tok::comment));
+ for (auto Pos = Text.find('\\'); Pos++ != StringRef::npos;
+ Pos = Text.find('\\', Pos)) {
+ if (Pos < Text.size() && Text[Pos] == '\n' &&
+ (!IsCpp || Text.substr(Pos + 1).ltrim().starts_with("//"))) {
+ truncateToken(Pos);
+ break;
}
}
}
@@ -1452,7 +1453,7 @@ FormatToken *FormatTokenLexer::getNextToken() {
Column = FormatTok->LastLineColumnWidth;
}
- if (Style.isCpp()) {
+ if (IsCpp) {
auto *Identifier = FormatTok->Tok.getIdentifierInfo();
auto it = Macros.find(Identifier);
if ((Tokens.empty() || !Tokens.back()->Tok.getIdentifierInfo() ||
More information about the cfe-commits
mailing list