[clang] 0ffb3dd - [clang-format] Fix a hang when formatting C# $@ string literals
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 25 23:18:01 PDT 2022
Author: owenca
Date: 2022-07-25T23:17:54-07:00
New Revision: 0ffb3dd33ee1a50a6ab5db80bb8caee9133e66dc
URL: https://github.com/llvm/llvm-project/commit/0ffb3dd33ee1a50a6ab5db80bb8caee9133e66dc
DIFF: https://github.com/llvm/llvm-project/commit/0ffb3dd33ee1a50a6ab5db80bb8caee9133e66dc.diff
LOG: [clang-format] Fix a hang when formatting C# $@ string literals
Fixes #56624.
Differential Revision: https://reviews.llvm.org/D130411
Added:
Modified:
clang/lib/Format/FormatTokenLexer.cpp
clang/unittests/Format/FormatTestCSharp.cpp
Removed:
################################################################################
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 66f03dcb53a12..3f9b68ccbb39f 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -239,55 +239,6 @@ bool FormatTokenLexer::tryMergeCSharpStringLiteral() {
if (Tokens.size() < 2)
return false;
- // Interpolated strings could contain { } with " characters inside.
- // $"{x ?? "null"}"
- // should not be split into $"{x ?? ", null, "}" but should treated as a
- // single string-literal.
- //
- // We opt not to try and format expressions inside {} within a C#
- // interpolated string. Formatting expressions within an interpolated string
- // would require similar work as that done for JavaScript template strings
- // in `handleTemplateStrings()`.
- auto &CSharpInterpolatedString = *(Tokens.end() - 2);
- if (CSharpInterpolatedString->getType() == TT_CSharpStringLiteral &&
- (CSharpInterpolatedString->TokenText.startswith(R"($")") ||
- CSharpInterpolatedString->TokenText.startswith(R"($@")"))) {
- int UnmatchedOpeningBraceCount = 0;
-
- auto TokenTextSize = CSharpInterpolatedString->TokenText.size();
- for (size_t Index = 0; Index < TokenTextSize; ++Index) {
- char C = CSharpInterpolatedString->TokenText[Index];
- if (C == '{') {
- // "{{" inside an interpolated string is an escaped '{' so skip it.
- if (Index + 1 < TokenTextSize &&
- CSharpInterpolatedString->TokenText[Index + 1] == '{') {
- ++Index;
- continue;
- }
- ++UnmatchedOpeningBraceCount;
- } else if (C == '}') {
- // "}}" inside an interpolated string is an escaped '}' so skip it.
- if (Index + 1 < TokenTextSize &&
- CSharpInterpolatedString->TokenText[Index + 1] == '}') {
- ++Index;
- continue;
- }
- --UnmatchedOpeningBraceCount;
- }
- }
-
- if (UnmatchedOpeningBraceCount > 0) {
- auto &NextToken = *(Tokens.end() - 1);
- CSharpInterpolatedString->TokenText =
- StringRef(CSharpInterpolatedString->TokenText.begin(),
- NextToken->TokenText.end() -
- CSharpInterpolatedString->TokenText.begin());
- CSharpInterpolatedString->ColumnWidth += NextToken->ColumnWidth;
- Tokens.erase(Tokens.end() - 1);
- return true;
- }
- }
-
// Look for @"aaaaaa" or $"aaaaaa".
auto &String = *(Tokens.end() - 1);
if (!String->is(tok::string_literal))
@@ -571,45 +522,105 @@ void FormatTokenLexer::tryParseJSRegexLiteral() {
resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
}
+static auto lexCSharpString(const char *Begin, const char *End, bool Verbatim,
+ bool Interpolated) {
+ auto Repeated = [&Begin, End]() {
+ return Begin + 1 < End && Begin[1] == Begin[0];
+ };
+
+ // Look for a terminating '"' in the current file buffer.
+ // Make no effort to format code within an interpolated or verbatim string.
+ //
+ // Interpolated strings could contain { } with " characters inside.
+ // $"{x ?? "null"}"
+ // should not be split into $"{x ?? ", null, "}" but should be treated as a
+ // single string-literal.
+ //
+ // We opt not to try and format expressions inside {} within a C#
+ // interpolated string. Formatting expressions within an interpolated string
+ // would require similar work as that done for JavaScript template strings
+ // in `handleTemplateStrings()`.
+ for (int UnmatchedOpeningBraceCount = 0; Begin < End; ++Begin) {
+ switch (*Begin) {
+ case '\\':
+ if (!Verbatim)
+ ++Begin;
+ break;
+ case '{':
+ if (Interpolated) {
+ // {{ inside an interpolated string is escaped, so skip it.
+ if (Repeated())
+ ++Begin;
+ else
+ ++UnmatchedOpeningBraceCount;
+ }
+ break;
+ case '}':
+ if (Interpolated) {
+ // }} inside an interpolated string is escaped, so skip it.
+ if (Repeated())
+ ++Begin;
+ else if (UnmatchedOpeningBraceCount > 0)
+ --UnmatchedOpeningBraceCount;
+ else
+ return End;
+ }
+ break;
+ case '"':
+ if (UnmatchedOpeningBraceCount > 0)
+ break;
+ // "" within a verbatim string is an escaped double quote: skip it.
+ if (Verbatim && Repeated()) {
+ ++Begin;
+ break;
+ }
+ return Begin;
+ }
+ }
+
+ return End;
+}
+
void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
FormatToken *CSharpStringLiteral = Tokens.back();
- if (CSharpStringLiteral->getType() != TT_CSharpStringLiteral)
+ if (CSharpStringLiteral->isNot(TT_CSharpStringLiteral))
return;
+ auto &TokenText = CSharpStringLiteral->TokenText;
+
+ bool Verbatim = false;
+ bool Interpolated = false;
+ if (TokenText.startswith(R"($@")")) {
+ Verbatim = true;
+ Interpolated = true;
+ } else if (TokenText.startswith(R"(@")")) {
+ Verbatim = true;
+ } else if (TokenText.startswith(R"($")")) {
+ Interpolated = true;
+ }
+
// Deal with multiline strings.
- if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") ||
- CSharpStringLiteral->TokenText.startswith(R"($@")"))) {
+ if (!Verbatim && !Interpolated)
return;
- }
- const char *StrBegin =
- Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size();
+ const char *StrBegin = Lex->getBufferLocation() - TokenText.size();
const char *Offset = StrBegin;
- if (CSharpStringLiteral->TokenText.startswith(R"(@")"))
- Offset += 2;
- else // CSharpStringLiteral->TokenText.startswith(R"($@")")
+ if (Verbatim && Interpolated)
Offset += 3;
+ else
+ Offset += 2;
- // Look for a terminating '"' in the current file buffer.
- // Make no effort to format code within an interpolated or verbatim string.
- for (; Offset != Lex->getBuffer().end(); ++Offset) {
- if (Offset[0] == '"') {
- // "" within a verbatim string is an escaped double quote: skip it.
- if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"')
- ++Offset;
- else
- break;
- }
- }
+ const auto End = Lex->getBuffer().end();
+ Offset = lexCSharpString(Offset, End, Verbatim, Interpolated);
// Make no attempt to format code properly if a verbatim string is
// unterminated.
- if (Offset == Lex->getBuffer().end())
+ if (Offset >= End)
return;
StringRef LiteralText(StrBegin, Offset - StrBegin + 1);
- CSharpStringLiteral->TokenText = LiteralText;
+ TokenText = LiteralText;
// Adjust width for potentially multiline string literals.
size_t FirstBreak = LiteralText.find('\n');
@@ -628,10 +639,8 @@ void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {
StartColumn, Style.TabWidth, Encoding);
}
- SourceLocation loc = Offset < Lex->getBuffer().end()
- ? Lex->getSourceLocation(Offset + 1)
- : SourceMgr.getLocForEndOfFile(ID);
- resetLexer(SourceMgr.getFileOffset(loc));
+ assert(Offset < End);
+ resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));
}
void FormatTokenLexer::handleTemplateStrings() {
diff --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp
index aa0304f73fed4..680a9576cf5a3 100644
--- a/clang/unittests/Format/FormatTestCSharp.cpp
+++ b/clang/unittests/Format/FormatTestCSharp.cpp
@@ -574,6 +574,7 @@ TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) {
verifyFormat(R"(string str = @"""";)", Style);
verifyFormat(R"(string str = @"""Hello world""";)", Style);
verifyFormat(R"(string str = $@"""Hello {friend}""";)", Style);
+ verifyFormat(R"(return $@"Foo ""/foo?f={Request.Query["f"]}""";)", Style);
}
TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) {
More information about the cfe-commits
mailing list