[clang] f9f0919 - [clang-format] Improve support for multiline C# strings
Jonathan Coe via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 30 05:46:12 PST 2020
Author: Jonathan Coe
Date: 2020-01-30T13:45:48Z
New Revision: f9f0919db7ea033a205c87eb08c81c4baaecd846
URL: https://github.com/llvm/llvm-project/commit/f9f0919db7ea033a205c87eb08c81c4baaecd846
DIFF: https://github.com/llvm/llvm-project/commit/f9f0919db7ea033a205c87eb08c81c4baaecd846.diff
LOG: [clang-format] Improve support for multiline C# strings
Reviewers: krasimir
Reviewed By: krasimir
Tags: #clang-format
Differential Revision: https://reviews.llvm.org/D73622
Added:
Modified:
clang/lib/Format/ContinuationIndenter.cpp
clang/lib/Format/FormatTokenLexer.cpp
clang/lib/Format/FormatTokenLexer.h
clang/unittests/Format/FormatTestCSharp.cpp
Removed:
################################################################################
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index ec2de35ca0d2..1ea7eb031d36 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1760,7 +1760,7 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
LineState &State, bool AllowBreak) {
unsigned StartColumn = State.Column - Current.ColumnWidth;
if (Current.isStringLiteral()) {
- // FIXME: String literal breaking is currently disabled for C#,Java and
+ // FIXME: String literal breaking is currently disabled for C#, Java and
// JavaScript, as it requires strings to be merged using "+" which we
// don't support.
if (Style.Language == FormatStyle::LK_Java ||
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index ba0bbf68f12f..98650951c7d0 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -57,6 +57,10 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() {
if (Style.Language == FormatStyle::LK_TextProto)
tryParsePythonComment();
tryMergePreviousTokens();
+ if (Style.isCSharp())
+ // This needs to come after tokens have been merged so that C#
+ // string literals are correctly identified.
+ handleCSharpVerbatimAndInterpolatedStrings();
if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
FirstInLineIndex = Tokens.size() - 1;
} while (Tokens.back()->Tok.isNot(tok::eof));
@@ -181,12 +185,12 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {
// Search for verbatim or interpolated string literals @"ABC" or
// $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
// prevent splitting of @, $ and ".
+// Merging of multiline verbatim strings with embedded '"' is handled in
+// handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing.
bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
if (Tokens.size() < 2)
return false;
- auto &CSharpStringLiteral = *(Tokens.end() - 2);
-
// Interpolated strings could contain { } with " characters inside.
// $"{x ?? "null"}"
// should not be split into $"{x ?? ", null, "}" but should treated as a
@@ -236,27 +240,12 @@ bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
}
}
- // verbatim strings could contain "" which C# sees as an escaped ".
- // @"""Hello""" will have been tokenized as @"" "Hello" "" and needs
- // merging into a single string literal.
+ // Look for @"aaaaaa" or $"aaaaaa".
auto &String = *(Tokens.end() - 1);
if (!String->is(tok::string_literal))
return false;
- if (CSharpStringLiteral->Type == TT_CSharpStringLiteral &&
- (CSharpStringLiteral->TokenText.startswith(R"(@")") ||
- CSharpStringLiteral->TokenText.startswith(R"($@")"))) {
- CSharpStringLiteral->TokenText = StringRef(
- CSharpStringLiteral->TokenText.begin(),
- String->TokenText.end() - CSharpStringLiteral->TokenText.begin());
- CSharpStringLiteral->ColumnWidth += String->ColumnWidth;
- Tokens.erase(Tokens.end() - 1);
- return true;
- }
-
auto &At = *(Tokens.end() - 2);
-
- // Look for @"aaaaaa" or $"aaaaaa".
if (!(At->is(tok::at) || At->TokenText == "$"))
return false;
@@ -498,6 +487,68 @@ void FormatTokenLexer::tryParseJSRegexLiteral() {
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
}
+void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
+ FormatToken *CSharpStringLiteral = Tokens.back();
+
+ if (CSharpStringLiteral->Type != TT_CSharpStringLiteral)
+ return;
+
+ // Deal with multiline strings.
+ if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
+ CSharpStringLiteral->TokenText.startswith(R"($@")")))
+ return;
+
+ const char *StrBegin =
+ Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size();
+ const char *Offset = StrBegin;
+ if (CSharpStringLiteral->TokenText.startswith(R"(@")"))
+ Offset += 2;
+ else // CSharpStringLiteral->TokenText.startswith(R"($@")")
+ Offset += 3;
+
+ // Look for a terminating '"' in the current file buffer.
+ // Make no effort to format code within an interpolated or verbatim string.
+ for (; Offset != Lex->getBuffer().end(); ++Offset) {
+ if (Offset[0] == '"') {
+ // "" within a verbatim string is an escaped double quote: skip it.
+ if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"')
+ ++Offset;
+ else
+ break;
+ }
+ }
+
+ // Make no attempt to format code properly if a verbatim string is
+ // unterminated.
+ if (Offset == Lex->getBuffer().end())
+ return;
+
+ StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
+ CSharpStringLiteral->TokenText = LiteralText;
+
+ // Adjust width for potentially multiline string literals.
+ size_t FirstBreak = LiteralText.find('\n');
+ StringRef FirstLineText = FirstBreak == StringRef::npos
+ ? LiteralText
+ : LiteralText.substr(0, FirstBreak);
+ CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs(
+ FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth,
+ Encoding);
+ size_t LastBreak = LiteralText.rfind('\n');
+ if (LastBreak != StringRef::npos) {
+ CSharpStringLiteral->IsMultiline = true;
+ unsigned StartColumn = 0; // The template tail spans the entire line.
+ CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs(
+ LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn,
+ Style.TabWidth, Encoding);
+ }
+
+ SourceLocation loc = Offset < Lex->getBuffer().end()
+ ? Lex->getSourceLocation(Offset + 1)
+ : SourceMgr.getLocForEndOfFile(ID);
+ resetLexer(SourceMgr.getFileOffset(loc));
+}
+
void FormatTokenLexer::handleTemplateStrings() {
FormatToken *BacktickToken = Tokens.back();
diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h
index 053b759d2440..be13ac8f6735 100644
--- a/clang/lib/Format/FormatTokenLexer.h
+++ b/clang/lib/Format/FormatTokenLexer.h
@@ -79,6 +79,8 @@ class FormatTokenLexer {
// nested template parts by balancing curly braces.
void handleTemplateStrings();
+ void handleCSharpVerbatimAndInterpolatedStrings();
+
void tryParsePythonComment();
bool tryMerge_TMacro();
diff --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp
index 222745f219f0..3d1b597174d8 100644
--- a/clang/unittests/Format/FormatTestCSharp.cpp
+++ b/clang/unittests/Format/FormatTestCSharp.cpp
@@ -412,9 +412,9 @@ TEST_F(FormatTestCSharp, CSharpSpaceAfterCStyleCast) {
TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) {
FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp);
- verifyFormat(R"(string str = @"""")", Style);
- verifyFormat(R"(string str = @"""Hello world""")", Style);
- verifyFormat(R"(string str = $@"""Hello {friend}""")", Style);
+ verifyFormat(R"(string str = @"""";)", Style);
+ verifyFormat(R"(string str = @"""Hello world""";)", Style);
+ verifyFormat(R"(string str = $@"""Hello {friend}""";)", Style);
}
TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
@@ -425,5 +425,37 @@ TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style);
}
+TEST_F(FormatTestCSharp, CSharpNewlinesInVerbatimStrings) {
+ // Use MS style as Google Style inserts a line break before multiline strings.
+
+ // verifyFormat does not understand multiline C# string-literals
+ // so check the format explicitly.
+
+ FormatStyle Style = getMicrosoftStyle(FormatStyle::LK_CSharp);
+
+ std::string Code = R"(string s1 = $@"some code:
+ class {className} {{
+ {className}() {{}}
+ }}";)";
+
+ EXPECT_EQ(Code, format(Code, Style));
+
+ // Multiline string in the middle of a function call.
+ Code = R"(
+var x = foo(className, $@"some code:
+ class {className} {{
+ {className}() {{}}
+ }}",
+ y);)"; // y aligned with `className` arg.
+
+ EXPECT_EQ(Code, format(Code, Style));
+
+ // Interpolated string with embedded multiline string.
+ Code = R"(Console.WriteLine($"{string.Join(@",
+ ", values)}");)";
+
+ EXPECT_EQ(Code, format(Code, Style));
+}
+
} // namespace format
} // end namespace clang
More information about the cfe-commits
mailing list