[clang] [clang-format] Handle Trailing Whitespace After Line Continuation (P2223R2) (PR #145243)
Naveen Seth Hanig via cfe-commits
cfe-commits at lists.llvm.org
Sun Jun 22 11:26:48 PDT 2025
https://github.com/naveen-seth updated https://github.com/llvm/llvm-project/pull/145243
>From 8cee6b33c54aca0ea69b3ad291287464523a3d7a Mon Sep 17 00:00:00 2001
From: Naveen Seth Hanig <naveen.hanig at outlook.com>
Date: Sun, 22 Jun 2025 18:06:07 +0200
Subject: [PATCH 1/2] [clang-format] Handle Trailing Whitespace After Line
Continuation (P2223R2)
Fixes #145226.
Implement P2223R2 in clang-format to correctly handle cases where a
backslash '\' is followed by trailing whitespace before the newline.
Previously, clang-format failed to properly detect and
handle such cases, leading to misformatted code.
With this, clang-format matches the behavior already implemented in
Clang's lexer and DependencyDirectivesScanner.cpp, which allow
trailing whitespace after a line continuation in any C++ standard.
---
clang/docs/ReleaseNotes.rst | 3 ++
clang/lib/Format/FormatTokenLexer.cpp | 31 ++++++++++++++-----
.../splice-trailing-whitespace-p2223r2.cpp | 14 +++++++++
3 files changed, 40 insertions(+), 8 deletions(-)
create mode 100644 clang/test/Format/splice-trailing-whitespace-p2223r2.cpp
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 96477ef6ddc9a..7c8e231655e86 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1025,6 +1025,9 @@ clang-format
``enum`` enumerator lists.
- Add ``OneLineFormatOffRegex`` option for turning formatting off for one line.
- Add ``SpaceAfterOperatorKeyword`` option.
+- Support trailing whitespace in line splicing.
+ (P2223R2 <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p2223r2.pdf>_, #GH145226)
+
clang-refactor
--------------
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 4cc4f5f22db0d..bcc8e6ffd91ab 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -14,6 +14,7 @@
#include "FormatTokenLexer.h"
#include "FormatToken.h"
+#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
@@ -1205,14 +1206,23 @@ static size_t countLeadingWhitespace(StringRef Text) {
while (Cur < End) {
if (isspace(Cur[0])) {
++Cur;
- } else if (Cur[0] == '\\' && (Cur[1] == '\n' || Cur[1] == '\r')) {
- // A '\' followed by a newline always escapes the newline, regardless
- // of whether there is another '\' before it.
+ } else if (Cur[0] == '\\') {
+ // A '\' followed by a optional horizontal whitespace (P22232R2) and then
+ // newline always escapes the newline, regardless of whether there is
+ // another '\' before it.
// The source has a null byte at the end. So the end of the entire input
// isn't reached yet. Also the lexer doesn't break apart an escaped
// newline.
- assert(End - Cur >= 2);
- Cur += 2;
+ const unsigned char *Lookahead = Cur + 1;
+ while (isHorizontalWhitespace(*Lookahead))
+ ++Lookahead;
+ if (*Lookahead == '\n' || *Lookahead == '\r') {
+ // Splice found, consume it.
+ Cur = Lookahead + 1;
+ continue;
+ }
+ // No line splice found; the '\' is a token.
+ break;
} else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' &&
(Cur[3] == '\n' || Cur[3] == '\r')) {
// Newlines can also be escaped by a '?' '?' '/' trigraph. By the way, the
@@ -1295,13 +1305,18 @@ FormatToken *FormatTokenLexer::getNextToken() {
case '/':
// The text was entirely whitespace when this loop was entered. Thus
// this has to be an escape sequence.
- assert(Text.substr(i, 2) == "\\\r" || Text.substr(i, 2) == "\\\n" ||
- Text.substr(i, 4) == "\?\?/\r" ||
+ assert(Text.substr(i, 4) == "\?\?/\r" ||
Text.substr(i, 4) == "\?\?/\n" ||
(i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" ||
Text.substr(i - 1, 4) == "\?\?/\n")) ||
(i >= 2 && (Text.substr(i - 2, 4) == "\?\?/\r" ||
- Text.substr(i - 2, 4) == "\?\?/\n")));
+ Text.substr(i - 2, 4) == "\?\?/\n")) ||
+ (Text[i] == '\\' && [&]() -> bool {
+ size_t j = i + 1;
+ while (j < Text.size() && isHorizontalWhitespace(Text[j]))
+ ++j;
+ return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r');
+ }()));
InEscape = true;
break;
default:
diff --git a/clang/test/Format/splice-trailing-whitespace-p2223r2.cpp b/clang/test/Format/splice-trailing-whitespace-p2223r2.cpp
new file mode 100644
index 0000000000000..3cc08cc631965
--- /dev/null
+++ b/clang/test/Format/splice-trailing-whitespace-p2223r2.cpp
@@ -0,0 +1,14 @@
+// RUN: grep -Ev "// *[A-Z-]+:" %s \
+// RUN: | clang-format -style='{BasedOnStyle: LLVM, AlignEscapedNewlines: DontAlign}' \
+// RUN: | FileCheck -strict-whitespace %s
+
+// CHECK: {{^#define TAG\(\.\.\.\) \\}}
+// CHECK: {{^ struct a \{\};}}
+// There is whitespace following v this backslash!
+#define TAG(...) struct a { \
+ };
+
+// CHECK: {{^int i;}}
+// The comment below eats its following line because of the line splice.
+// I also have trailing whitespace. Nom nom nom \
+int i;
>From 26d525e3ff75885390f5d46ccb5c044fdbd8ddde Mon Sep 17 00:00:00 2001
From: Naveen Seth Hanig <naveen.hanig at outlook.com>
Date: Sun, 22 Jun 2025 20:23:51 +0200
Subject: [PATCH 2/2] Remove deprecated usage of grep from regression test
---
clang/test/Format/splice-trailing-whitespace-p2223r2.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/test/Format/splice-trailing-whitespace-p2223r2.cpp b/clang/test/Format/splice-trailing-whitespace-p2223r2.cpp
index 3cc08cc631965..4e9e2612af78a 100644
--- a/clang/test/Format/splice-trailing-whitespace-p2223r2.cpp
+++ b/clang/test/Format/splice-trailing-whitespace-p2223r2.cpp
@@ -1,5 +1,4 @@
-// RUN: grep -Ev "// *[A-Z-]+:" %s \
-// RUN: | clang-format -style='{BasedOnStyle: LLVM, AlignEscapedNewlines: DontAlign}' \
+// RUN: clang-format -style='{BasedOnStyle: LLVM, AlignEscapedNewlines: DontAlign}' %s \
// RUN: | FileCheck -strict-whitespace %s
// CHECK: {{^#define TAG\(\.\.\.\) \\}}
More information about the cfe-commits
mailing list