[clang] 9d212e8 - [clang-format] Handle quotes and escaped braces in C# interpolated strings

Jonathan Coe via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 28 07:07:49 PST 2020


Author: Jonathan Coe
Date: 2020-01-28T14:46:27Z
New Revision: 9d212e83e920363762eb265293adf0bd6fda5a13

URL: https://github.com/llvm/llvm-project/commit/9d212e83e920363762eb265293adf0bd6fda5a13
DIFF: https://github.com/llvm/llvm-project/commit/9d212e83e920363762eb265293adf0bd6fda5a13.diff

LOG: [clang-format] Handle quotes and escaped braces in C# interpolated strings

Summary:
This addresses issues raised in https://bugs.llvm.org/show_bug.cgi?id=44454.

There are outstanding issues with multi-line verbatim strings in C# that will be addressed in a follow-up PR.

Reviewers: krasimir, MyDeveloperDay

Reviewed By: krasimir, MyDeveloperDay

Subscribers: MyDeveloperDay

Tags: #clang-format

Differential Revision: https://reviews.llvm.org/D73492

Added: 
    

Modified: 
    clang/lib/Format/FormatTokenLexer.cpp
    clang/lib/Format/FormatTokenLexer.h
    clang/unittests/Format/FormatTestCSharp.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index d8dfe17fb89c..ba0bbf68f12f 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -74,7 +74,7 @@ void FormatTokenLexer::tryMergePreviousTokens() {
   if (Style.isCSharp()) {
     if (tryMergeCSharpKeywordVariables())
       return;
-    if (tryMergeCSharpVerbatimStringLiteral())
+    if (tryMergeCSharpStringLiteral())
       return;
     if (tryMergeCSharpDoubleQuestion())
       return;
@@ -181,18 +181,68 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
 // Search for verbatim or interpolated string literals @"ABC" or
 // $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
 // prevent splitting of @, $ and ".
-bool FormatTokenLexer::tryMergeCSharpVerbatimStringLiteral() {
+bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
   if (Tokens.size() < 2)
     return false;
 
-  auto &String = *(Tokens.end() - 1);
-  if (!String->is(tok::string_literal))
-    return false;
+  auto &CSharpStringLiteral = *(Tokens.end() - 2);
+
+  // Interpolated strings could contain { } with " characters inside.
+  // $"{x ?? "null"}"
+  // should not be split into $"{x ?? ", null, "}" but should treated as a
+  // single string-literal.
+  //
+  // We opt not to try and format expressions inside {} within a C#
+  // interpolated string. Formatting expressions within an interpolated string
+  // would require similar work as that done for JavaScript template strings
+  // in `handleTemplateStrings()`.
+  auto &CSharpInterpolatedString = *(Tokens.end() - 2);
+  if (CSharpInterpolatedString->Type == TT_CSharpStringLiteral &&
+      (CSharpInterpolatedString->TokenText.startswith(R"($")") ||
+       CSharpInterpolatedString->TokenText.startswith(R"($@")"))) {
+    int UnmatchedOpeningBraceCount = 0;
+
+    auto TokenTextSize = CSharpInterpolatedString->TokenText.size();
+    for (size_t Index = 0; Index < TokenTextSize; ++Index) {
+      char C = CSharpInterpolatedString->TokenText[Index];
+      if (C == '{') {
+        // "{{"  inside an interpolated string is an escaped '{' so skip it.
+        if (Index + 1 < TokenTextSize &&
+            CSharpInterpolatedString->TokenText[Index + 1] == '{') {
+          ++Index;
+          continue;
+        }
+        ++UnmatchedOpeningBraceCount;
+      } else if (C == '}') {
+        // "}}"  inside an interpolated string is an escaped '}' so skip it.
+        if (Index + 1 < TokenTextSize &&
+            CSharpInterpolatedString->TokenText[Index + 1] == '}') {
+          ++Index;
+          continue;
+        }
+        --UnmatchedOpeningBraceCount;
+      }
+    }
+
+    if (UnmatchedOpeningBraceCount > 0) {
+      auto &NextToken = *(Tokens.end() - 1);
+      CSharpInterpolatedString->TokenText =
+          StringRef(CSharpInterpolatedString->TokenText.begin(),
+                    NextToken->TokenText.end() -
+                        CSharpInterpolatedString->TokenText.begin());
+      CSharpInterpolatedString->ColumnWidth += NextToken->ColumnWidth;
+      Tokens.erase(Tokens.end() - 1);
+      return true;
+    }
+  }
 
   // verbatim strings could contain "" which C# sees as an escaped ".
   // @"""Hello""" will have been tokenized as @"" "Hello" "" and needs
   // merging into a single string literal.
-  auto &CSharpStringLiteral = *(Tokens.end() - 2);
+  auto &String = *(Tokens.end() - 1);
+  if (!String->is(tok::string_literal))
+    return false;
+
   if (CSharpStringLiteral->Type == TT_CSharpStringLiteral &&
       (CSharpStringLiteral->TokenText.startswith(R"(@")") ||
        CSharpStringLiteral->TokenText.startswith(R"($@")"))) {

diff  --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 611211be055a..053b759d2440 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -49,7 +49,7 @@ class FormatTokenLexer {
   bool tryMergeLessLess();
   bool tryMergeNSStringLiteral();
   bool tryMergeJSPrivateIdentifier();
-  bool tryMergeCSharpVerbatimStringLiteral();
+  bool tryMergeCSharpStringLiteral();
   bool tryMergeCSharpKeywordVariables();
   bool tryMergeCSharpNullConditionals();
   bool tryMergeCSharpDoubleQuestion();

diff  --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp
index f777d319ea40..222745f219f0 100644
--- a/clang/unittests/Format/FormatTestCSharp.cpp
+++ b/clang/unittests/Format/FormatTestCSharp.cpp
@@ -417,5 +417,13 @@ TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) {
   verifyFormat(R"(string str = $@"""Hello {friend}""")", Style);
 }
 
+TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
+  FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp);
+
+  verifyFormat(R"(string str1 = $"{null ?? "null"}";)", Style);
+  verifyFormat(R"(string str2 = $"{{{braceCount} braces";)", Style);
+  verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style);
+}
+
 } // namespace format
 } // end namespace clang


        


More information about the cfe-commits mailing list