[clang] [clang-format] Break after string literals with trailing line breaks (PR #76795)

kadir çetinkaya via cfe-commits cfe-commits at lists.llvm.org
Fri Jan 5 07:28:51 PST 2024


https://github.com/kadircet updated https://github.com/llvm/llvm-project/pull/76795

>From 18ef1d8901835f5129f775d292425b808f42fe85 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet at google.com>
Date: Wed, 3 Jan 2024 09:28:35 +0100
Subject: [PATCH 1/4] [clang-format] Break after string literals with trailing
 line breaks

This restores a subset of functionality that was forego in
d68826dfbd987377ef6771d40c1d984f09ee3b9e.

Streaming multiple string literals is rare enough in practice, hence
that change makes sense in general. But it seems people were
incidentally relying on this for having line breaks after string
literals that ended with `\n`.

This patch tries to restore that behavior to prevent regressions in the
upcoming LLVM release, until we can implement some configuration based
approach as proposed in https://github.com/llvm/llvm-project/pull/69859.
---
 clang/lib/Format/TokenAnnotator.cpp           |  8 ++++++++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 10 ++++++++++
 2 files changed, 18 insertions(+)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 3ac3aa3c5e3a22..27c6bb0ef6fe4f 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -5151,6 +5151,14 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
     return true;
   if (Left.IsUnterminatedLiteral)
     return true;
+  if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) &&
+      // FIXME: Breaking after newlines seems useful in general. Turn this into
+      // an option and Recognize more cases like endl etc, and break independent
+      // of what comes after operator lessless.
+      Right.Next->is(tok::string_literal) &&
+      Left.TokenText.ends_with("\\n\"")) {
+    return true;
+  }
   if (Right.is(TT_RequiresClause)) {
     switch (Style.RequiresClausePosition) {
     case FormatStyle::RCPS_OwnLine:
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 2cafc0438ffb46..cd3ffb611839d2 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -10,6 +10,7 @@
 
 #include "FormatTestUtils.h"
 #include "TestLexer.h"
+#include "clang/Basic/TokenKinds.h"
 #include "gtest/gtest.h"
 
 namespace clang {
@@ -2499,6 +2500,15 @@ TEST_F(TokenAnnotatorTest, BraceKind) {
   EXPECT_BRACE_KIND(Tokens[6], BK_Block);
 }
 
+TEST_F(TokenAnnotatorTest, StreamOperator) {
+  auto Tokens = annotate("\"foo\\n\" << aux << \"foo\\n\" << \"foo\";");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_FALSE(Tokens[1]->MustBreakBefore);
+  EXPECT_FALSE(Tokens[3]->MustBreakBefore);
+  // Only break between string literals if the former ends with \n.
+  EXPECT_TRUE(Tokens[5]->MustBreakBefore);
+}
+
 } // namespace
 } // namespace format
 } // namespace clang

>From ecc4284ce3d174944a09189d7ff3570df4ccbe70 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet at google.com>
Date: Thu, 4 Jan 2024 08:16:17 +0100
Subject: [PATCH 2/4] Add formatting test & drop extra include

---
 clang/unittests/Format/FormatTest.cpp         | 2 ++
 clang/unittests/Format/TokenAnnotatorTest.cpp | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 881993ede17c3d..25ef5c680af862 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -26708,6 +26708,8 @@ TEST_F(FormatTest, PPBranchesInBracedInit) {
 
 TEST_F(FormatTest, StreamOutputOperator) {
   verifyFormat("std::cout << \"foo\" << \"bar\" << baz;");
+  verifyFormat("std::cout << \"foo\\n\"\n"
+               "          << \"bar\";");
 }
 
 TEST_F(FormatTest, BreakAdjacentStringLiterals) {
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index cd3ffb611839d2..decc0785c5cde7 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -10,7 +10,6 @@
 
 #include "FormatTestUtils.h"
 #include "TestLexer.h"
-#include "clang/Basic/TokenKinds.h"
 #include "gtest/gtest.h"
 
 namespace clang {

>From d77d94f28c7553834c6372aee0a43946b53dbcf2 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet at google.com>
Date: Fri, 5 Jan 2024 08:48:35 +0100
Subject: [PATCH 3/4] Move comment around

---
 clang/lib/Format/TokenAnnotator.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 27c6bb0ef6fe4f..695eed7412d877 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -5151,10 +5151,10 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
     return true;
   if (Left.IsUnterminatedLiteral)
     return true;
+  // FIXME: Breaking after newlines seems useful in general. Turn this into an
+  // option and Recognize more cases like endl etc, and break independent of
+  // what comes after operator lessless.
   if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) &&
-      // FIXME: Breaking after newlines seems useful in general. Turn this into
-      // an option and Recognize more cases like endl etc, and break independent
-      // of what comes after operator lessless.
       Right.Next->is(tok::string_literal) &&
       Left.TokenText.ends_with("\\n\"")) {
     return true;

>From a2a3edcd15d331ef51c7c5a93fbf3bdd6012e724 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet at google.com>
Date: Fri, 5 Jan 2024 16:27:10 +0100
Subject: [PATCH 4/4] Fix capitalization in comment and re-order conditions

---
 clang/lib/Format/TokenAnnotator.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 695eed7412d877..8b43438c72dfe1 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -5152,10 +5152,10 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
   if (Left.IsUnterminatedLiteral)
     return true;
   // FIXME: Breaking after newlines seems useful in general. Turn this into an
-  // option and Recognize more cases like endl etc, and break independent of
+  // option and recognize more cases like endl etc, and break independent of
   // what comes after operator lessless.
-  if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) &&
-      Right.Next->is(tok::string_literal) &&
+  if (Right.is(tok::lessless) && Right.Next &&
+      Right.Next->is(tok::string_literal) && Left.is(tok::string_literal) &&
       Left.TokenText.ends_with("\\n\"")) {
     return true;
   }



More information about the cfe-commits mailing list