[clang] dd47b84 - [clang-format] Handle Trailing Whitespace After Line Continuation (P2223R2) (#145243)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 25 09:13:03 PDT 2025
Author: Naveen Seth Hanig
Date: 2025-06-25T18:13:00+02:00
New Revision: dd47b845a62cdaf4a1b0aba354cd80a4eabd9570
URL: https://github.com/llvm/llvm-project/commit/dd47b845a62cdaf4a1b0aba354cd80a4eabd9570
DIFF: https://github.com/llvm/llvm-project/commit/dd47b845a62cdaf4a1b0aba354cd80a4eabd9570.diff
LOG: [clang-format] Handle Trailing Whitespace After Line Continuation (P2223R2) (#145243)
Fixes #145226.
Implement
[P2223R2](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p2223r2.pdf)
in clang-format to correctly handle cases where a backslash '\\' is
followed by trailing whitespace before the newline.
Previously, `clang-format` failed to properly detect and handle such
cases, leading to misformatted code.
With this, `clang-format` matches the behavior already implemented in
Clang's lexer and `DependencyDirectivesScanner.cpp`, which allow
trailing whitespace after a line continuation in any C++ standard.
Added:
Modified:
clang/lib/Format/FormatTokenLexer.cpp
clang/unittests/Format/FormatTest.cpp
Removed:
################################################################################
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 4cc4f5f22db0d..06f68ec8b0fc1 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -14,6 +14,7 @@
#include "FormatTokenLexer.h"
#include "FormatToken.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
@@ -1203,16 +1204,22 @@ static size_t countLeadingWhitespace(StringRef Text) {
const unsigned char *const End = Text.bytes_end();
const unsigned char *Cur = Begin;
while (Cur < End) {
- if (isspace(Cur[0])) {
+ if (isWhitespace(Cur[0])) {
++Cur;
- } else if (Cur[0] == '\\' && (Cur[1] == '\n' || Cur[1] == '\r')) {
- // A '\' followed by a newline always escapes the newline, regardless
- // of whether there is another '\' before it.
+ } else if (Cur[0] == '\\') {
+ // A backslash followed by optional horizontal whitespaces (P22232R2) and
+ // then a newline always escapes the newline.
// The source has a null byte at the end. So the end of the entire input
// isn't reached yet. Also the lexer doesn't break apart an escaped
// newline.
- assert(End - Cur >= 2);
- Cur += 2;
+ const auto *Lookahead = Cur + 1;
+ while (isHorizontalWhitespace(*Lookahead))
+ ++Lookahead;
+ // No line splice found; the backslash is a token.
+ if (!isVerticalWhitespace(*Lookahead))
+ break;
+ // Splice found, consume it.
+ Cur = Lookahead + 1;
} else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' &&
(Cur[3] == '\n' || Cur[3] == '\r')) {
// Newlines can also be escaped by a '?' '?' '/' trigraph. By the way, the
@@ -1295,13 +1302,18 @@ FormatToken *FormatTokenLexer::getNextToken() {
case '/':
// The text was entirely whitespace when this loop was entered. Thus
// this has to be an escape sequence.
- assert(Text.substr(i, 2) == "\\\r" || Text.substr(i, 2) == "\\\n" ||
- Text.substr(i, 4) == "\?\?/\r" ||
+ assert(Text.substr(i, 4) == "\?\?/\r" ||
Text.substr(i, 4) == "\?\?/\n" ||
(i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" ||
Text.substr(i - 1, 4) == "\?\?/\n")) ||
(i >= 2 && (Text.substr(i - 2, 4) == "\?\?/\r" ||
- Text.substr(i - 2, 4) == "\?\?/\n")));
+ Text.substr(i - 2, 4) == "\?\?/\n")) ||
+ (Text[i] == '\\' && [&]() -> bool {
+ size_t j = i + 1;
+ while (j < Text.size() && isHorizontalWhitespace(Text[j]))
+ ++j;
+ return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r');
+ }()));
InEscape = true;
break;
default:
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index c0633ba3c29b3..a05bf8305716b 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -25768,6 +25768,21 @@ TEST_F(FormatTest, OperatorPassedAsAFunctionPtr) {
verifyFormat("foo(operator, , -42);", Style);
}
+TEST_F(FormatTest, LineSpliceWithTrailingWhitespace) {
+ auto Style = getLLVMStyle();
+ Style.AlignEscapedNewlines = FormatStyle::ENAS_DontAlign;
+ Style.UseTab = FormatStyle::UT_Never;
+
+ verifyFormat("int i;", " \\ \n"
+ " int i;");
+ verifyFormat("#define FOO(args) \\\n"
+ " struct a {};",
+ "#define FOO( args ) \\ \n"
+ "struct a{\\\t\t\t\n"
+ " };",
+ Style);
+}
+
TEST_F(FormatTest, WhitespaceSensitiveMacros) {
FormatStyle Style = getLLVMStyle();
Style.WhitespaceSensitiveMacros.push_back("FOO");
More information about the cfe-commits
mailing list