[clang] 9d212e8 - [clang-format] Handle quotes and escaped braces in C# interpolated strings
Jonathan Coe via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 28 07:07:49 PST 2020
Author: Jonathan Coe
Date: 2020-01-28T14:46:27Z
New Revision: 9d212e83e920363762eb265293adf0bd6fda5a13
URL: https://github.com/llvm/llvm-project/commit/9d212e83e920363762eb265293adf0bd6fda5a13
DIFF: https://github.com/llvm/llvm-project/commit/9d212e83e920363762eb265293adf0bd6fda5a13.diff
LOG: [clang-format] Handle quotes and escaped braces in C# interpolated strings
Summary:
This addresses issues raised in https://bugs.llvm.org/show_bug.cgi?id=44454.
There are outstanding issues with multi-line verbatim strings in C# that will be addressed in a follow-up PR.
Reviewers: krasimir, MyDeveloperDay
Reviewed By: krasimir, MyDeveloperDay
Subscribers: MyDeveloperDay
Tags: #clang-format
Differential Revision: https://reviews.llvm.org/D73492
Added:
Modified:
clang/lib/Format/FormatTokenLexer.cpp
clang/lib/Format/FormatTokenLexer.h
clang/unittests/Format/FormatTestCSharp.cpp
Removed:
################################################################################
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index d8dfe17fb89c..ba0bbf68f12f 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -74,7 +74,7 @@ void FormatTokenLexer::tryMergePreviousTokens() {
if (Style.isCSharp()) {
if (tryMergeCSharpKeywordVariables())
return;
- if (tryMergeCSharpVerbatimStringLiteral())
+ if (tryMergeCSharpStringLiteral())
return;
if (tryMergeCSharpDoubleQuestion())
return;
@@ -181,18 +181,68 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
// Search for verbatim or interpolated string literals @"ABC" or
// $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
// prevent splitting of @, $ and ".
-bool FormatTokenLexer::tryMergeCSharpVerbatimStringLiteral() {
+bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
if (Tokens.size() < 2)
return false;
- auto &String = *(Tokens.end() - 1);
- if (!String->is(tok::string_literal))
- return false;
+ auto &CSharpStringLiteral = *(Tokens.end() - 2);
+
+ // Interpolated strings could contain { } with " characters inside.
+ // $"{x ?? "null"}"
+ // should not be split into $"{x ?? ", null, "}" but should treated as a
+ // single string-literal.
+ //
+ // We opt not to try and format expressions inside {} within a C#
+ // interpolated string. Formatting expressions within an interpolated string
+ // would require similar work as that done for JavaScript template strings
+ // in `handleTemplateStrings()`.
+ auto &CSharpInterpolatedString = *(Tokens.end() - 2);
+ if (CSharpInterpolatedString->Type == TT_CSharpStringLiteral &&
+ (CSharpInterpolatedString->TokenText.startswith(R"($")") ||
+ CSharpInterpolatedString->TokenText.startswith(R"($@")"))) {
+ int UnmatchedOpeningBraceCount = 0;
+
+ auto TokenTextSize = CSharpInterpolatedString->TokenText.size();
+ for (size_t Index = 0; Index < TokenTextSize; ++Index) {
+ char C = CSharpInterpolatedString->TokenText[Index];
+ if (C == '{') {
+ // "{{" inside an interpolated string is an escaped '{' so skip it.
+ if (Index + 1 < TokenTextSize &&
+ CSharpInterpolatedString->TokenText[Index + 1] == '{') {
+ ++Index;
+ continue;
+ }
+ ++UnmatchedOpeningBraceCount;
+ } else if (C == '}') {
+ // "}}" inside an interpolated string is an escaped '}' so skip it.
+ if (Index + 1 < TokenTextSize &&
+ CSharpInterpolatedString->TokenText[Index + 1] == '}') {
+ ++Index;
+ continue;
+ }
+ --UnmatchedOpeningBraceCount;
+ }
+ }
+
+ if (UnmatchedOpeningBraceCount > 0) {
+ auto &NextToken = *(Tokens.end() - 1);
+ CSharpInterpolatedString->TokenText =
+ StringRef(CSharpInterpolatedString->TokenText.begin(),
+ NextToken->TokenText.end() -
+ CSharpInterpolatedString->TokenText.begin());
+ CSharpInterpolatedString->ColumnWidth += NextToken->ColumnWidth;
+ Tokens.erase(Tokens.end() - 1);
+ return true;
+ }
+ }
// verbatim strings could contain "" which C# sees as an escaped ".
// @"""Hello""" will have been tokenized as @"" "Hello" "" and needs
// merging into a single string literal.
- auto &CSharpStringLiteral = *(Tokens.end() - 2);
+ auto &String = *(Tokens.end() - 1);
+ if (!String->is(tok::string_literal))
+ return false;
+
if (CSharpStringLiteral->Type == TT_CSharpStringLiteral &&
(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
CSharpStringLiteral->TokenText.startswith(R"($@")"))) {
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 611211be055a..053b759d2440 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -49,7 +49,7 @@ class FormatTokenLexer {
bool tryMergeLessLess();
bool tryMergeNSStringLiteral();
bool tryMergeJSPrivateIdentifier();
- bool tryMergeCSharpVerbatimStringLiteral();
+ bool tryMergeCSharpStringLiteral();
bool tryMergeCSharpKeywordVariables();
bool tryMergeCSharpNullConditionals();
bool tryMergeCSharpDoubleQuestion();
diff --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp
index f777d319ea40..222745f219f0 100644
--- a/clang/unittests/Format/FormatTestCSharp.cpp
+++ b/clang/unittests/Format/FormatTestCSharp.cpp
@@ -417,5 +417,13 @@ TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) {
verifyFormat(R"(string str = $@"""Hello {friend}""")", Style);
}
+TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
+ FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp);
+
+ verifyFormat(R"(string str1 = $"{null ?? "null"}";)", Style);
+ verifyFormat(R"(string str2 = $"{{{braceCount} braces";)", Style);
+ verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style);
+}
+
} // namespace format
} // end namespace clang
More information about the cfe-commits
mailing list