[llvm-branch-commits] [clang] release/20.x: [clang-format] Handle Java text blocks (#141334) (PR #141433)
Owen Pan via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun May 25 16:56:12 PDT 2025
https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/141433
>From d32d17a56f6d53cebc6299c0474fc95abeb141ed Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Sun, 25 May 2025 15:40:45 -0700
Subject: [PATCH 1/2] [clang-format] Handle Java text blocks (#141334)
Fix #61954
(cherry picked from commit b7f5950bb3b97eac979925a3bbf015530c26962e)
---
clang/lib/Format/FormatTokenLexer.cpp | 32 +++++++++++++
clang/lib/Format/FormatTokenLexer.h | 2 +
clang/unittests/Format/FormatTestJava.cpp | 57 +++++++++++++++++++++++
3 files changed, 91 insertions(+)
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 16f0a76f3a954..d8ad3a1d52115 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -636,6 +636,36 @@ bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {
return true;
}
+void FormatTokenLexer::tryParseJavaTextBlock() {
+ if (FormatTok->TokenText != "\"\"")
+ return;
+
+ const auto *S = Lex->getBufferLocation();
+ const auto *End = Lex->getBuffer().end();
+
+ if (S == End || *S != '\"')
+ return;
+
+ ++S; // Skip the `"""` that begins a text block.
+
+ // Find the `"""` that ends the text block.
+ for (int Count = 0; Count < 3 && S < End; ++S) {
+ switch (*S) {
+ case '\\':
+ Count = -1;
+ break;
+ case '\"':
+ ++Count;
+ break;
+ default:
+ Count = 0;
+ }
+ }
+
+ // Ignore the possibly invalid text block.
+ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S)));
+}
+
// Tries to parse a JavaScript Regex literal starting at the current token,
// if that begins with a slash and is in a location where JavaScript allows
// regex literals. Changes the current token to a regex literal and updates
@@ -1326,6 +1356,8 @@ FormatToken *FormatTokenLexer::getNextToken() {
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
++Column;
StateStack.push(LexerState::TOKEN_STASHED);
+ } else if (Style.isJava() && FormatTok->is(tok::string_literal)) {
+ tryParseJavaTextBlock();
}
if (Style.isVerilog() && Tokens.size() > 0 &&
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 61474a3f9ada8..d9a25c8ef3538 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -71,6 +71,8 @@ class FormatTokenLexer {
bool canPrecedeRegexLiteral(FormatToken *Prev);
+ void tryParseJavaTextBlock();
+
// Tries to parse a JavaScript Regex literal starting at the current token,
// if that begins with a slash and is in a location where JavaScript allows
// regex literals. Changes the current token to a regex literal and updates
diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp
index 33998bc7ff858..d0a3b4eb96d69 100644
--- a/clang/unittests/Format/FormatTestJava.cpp
+++ b/clang/unittests/Format/FormatTestJava.cpp
@@ -789,6 +789,63 @@ TEST_F(FormatTestJava, AlignCaseArrows) {
Style);
}
+TEST_F(FormatTestJava, TextBlock) {
+ verifyNoChange("String myStr = \"\"\"\n"
+ "hello\n"
+ "there\n"
+ "\"\"\";");
+
+ verifyNoChange("String tb = \"\"\"\n"
+ " the new\"\"\";");
+
+ verifyNoChange("System.out.println(\"\"\"\n"
+ " This is the first line\n"
+ " This is the second line\n"
+ " \"\"\");");
+
+ verifyNoChange("void writeHTML() {\n"
+ " String html = \"\"\" \n"
+ " <html>\n"
+ " <p>Hello World.</p>\n"
+ " </html>\n"
+ "\"\"\";\n"
+ " writeOutput(html);\n"
+ "}");
+
+ verifyNoChange("String colors = \"\"\"\t\n"
+ " red\n"
+ " green\n"
+ " blue\"\"\".indent(4);");
+
+ verifyNoChange("String code = \"\"\"\n"
+ " String source = \\\"\"\"\n"
+ " String message = \"Hello, World!\";\n"
+ " System.out.println(message);\n"
+ " \\\"\"\";\n"
+ " \"\"\";");
+
+ verifyNoChange(
+ "class Outer {\n"
+ " void printPoetry() {\n"
+ " String lilacs = \"\"\"\n"
+ "Passing the apple-tree blows of white and pink in the orchards\n"
+ "\"\"\";\n"
+ " System.out.println(lilacs);\n"
+ " }\n"
+ "}");
+
+ verifyNoChange("String name = \"\"\"\r\n"
+ " red\n"
+ " green\n"
+ " blue\\\n"
+ " \"\"\";");
+
+ verifyFormat("String name = \"\"\"Pat Q. Smith\"\"\";");
+
+ verifyNoChange("String name = \"\"\"\n"
+ " Pat Q. Smith");
+}
+
} // namespace
} // namespace test
} // namespace format
>From def14516c2b0997656d6f87947217e6b5b6e9984 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Sun, 25 May 2025 16:56:04 -0700
Subject: [PATCH 2/2] Update FormatTokenLexer.cpp
---
clang/lib/Format/FormatTokenLexer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index d8ad3a1d52115..9fdce7b4aa7fc 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -1356,8 +1356,8 @@ FormatToken *FormatTokenLexer::getNextToken() {
FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
++Column;
StateStack.push(LexerState::TOKEN_STASHED);
- } else if (Style.isJava() && FormatTok->is(tok::string_literal)) {
- tryParseJavaTextBlock();
+ } else if (Style.Language == FormatStyle::LK_Java &&
+ FormatTok->is(tok::string_literal)) {
}
if (Style.isVerilog() && Tokens.size() > 0 &&
More information about the llvm-branch-commits
mailing list