[PATCH] Better support for multiline string literals (including C++11 raw string literals).
Alexander Kornienko
alexfh at google.com
Thu Aug 29 06:44:18 PDT 2013
Hi djasper,
Calculate characters in the first and the last line correctly so that
we only break before the literal when needed.
http://llvm-reviews.chandlerc.com/D1544
Files:
lib/Format/ContinuationIndenter.cpp
lib/Format/ContinuationIndenter.h
unittests/Format/FormatTest.cpp
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -579,35 +579,60 @@
return Penalty;
}
+unsigned
+ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
+ LineState &State) {
+ StringRef Text = Current.TokenText;
+ size_t FirstLineBreak = Text.find('\n');
+ size_t LastLineBreak = Text.find_last_of('\n');
+ assert(FirstLineBreak != StringRef::npos);
+ unsigned StartColumn = State.Column - Current.CodePointCount;
+ State.Column =
+ encoding::getCodePointCount(Text.substr(LastLineBreak + 1), Encoding);
+
+ // Break before further function parameters on all levels.
+ for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
+ State.Stack[i].BreakBeforeParameter = true;
+
+ unsigned ColumnsUsed =
+ StartColumn +
+ encoding::getCodePointCount(Text.substr(0, FirstLineBreak), Encoding);
+ if (ColumnsUsed > getColumnLimit())
+ return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
+ return 0;
+}
+
unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
LineState &State,
bool DryRun) {
if (!Current.isOneOf(tok::string_literal, tok::comment))
return 0;
llvm::OwningPtr<BreakableToken> Token;
unsigned StartColumn = State.Column - Current.CodePointCount;
- unsigned OriginalStartColumn =
- SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) - 1;
if (Current.is(tok::string_literal) &&
Current.Type != TT_ImplicitStringLiteral) {
+ // Don't break string literals with (in case of non-raw strings, escaped)
+ // newlines. As clang-format must not change the string's content, it is
+ // unlikely that we'll end up with a better format.
+ if (Current.TokenText.find('\n') != StringRef::npos)
+ return addMultilineStringLiteral(Current, State);
+
// Only break up default narrow strings.
if (!Current.TokenText.startswith("\""))
return 0;
- // Don't break string literals with escaped newlines. As clang-format must
- // not change the string's content, it is unlikely that we'll end up with
- // a better format.
- if (Current.TokenText.find("\\\n") != StringRef::npos)
- return 0;
// Exempts unterminated string literals from line breaking. The user will
// likely want to terminate the string before any line breaking is done.
if (Current.IsUnterminatedLiteral)
return 0;
Token.reset(new BreakableStringLiteral(Current, StartColumn,
Line.InPPDirective, Encoding));
} else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
+ unsigned OriginalStartColumn =
+ SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
+ 1;
Token.reset(new BreakableBlockComment(
Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
Line.InPPDirective, Encoding));
@@ -707,6 +732,10 @@
const FormatToken &Current = *State.NextToken;
if (!Current.is(tok::string_literal))
return false;
+ // We never consider raw string literals "multiline" for the purpose of
+ // AlwaysBreakBeforeMultilineStrings implementation.
+ if (Current.TokenText.startswith("R\""))
+ return false;
if (Current.getNextNonComment() &&
Current.getNextNonComment()->is(tok::string_literal))
return true; // Implicit concatenation.
Index: lib/Format/ContinuationIndenter.h
===================================================================
--- lib/Format/ContinuationIndenter.h
+++ lib/Format/ContinuationIndenter.h
@@ -84,6 +84,9 @@
unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
bool DryRun);
+ unsigned addMultilineStringLiteral(const FormatToken &Current,
+ LineState &State);
+
/// \brief Returns \c true if the next token starts a multiline string
/// literal.
///
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -5340,13 +5340,53 @@
}
TEST_F(FormatTest, DontSplitStringLiteralsWithEscapedNewlines) {
- EXPECT_EQ("aaaaaaaaaaa =\n"
- " \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
- " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
- " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\";",
- format("aaaaaaaaaaa = \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
- " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
- " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\";"));
+ EXPECT_EQ(
+ "aaaaaaaaaaa = \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
+ " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
+ " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\";",
+ format("aaaaaaaaaaa = \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
+ " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\\\n"
+ " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\";"));
+}
+
+TEST_F(FormatTest, CountsCharactersInMultilineRawStringLiterals) {
+ EXPECT_EQ("f(g(R\"x(raw literal)x\", a), b);",
+ format("f(g(R\"x(raw literal)x\", a), b);", getGoogleStyle()));
+ EXPECT_EQ("fffffffffff(g(R\"x(\n"
+ "multiline raw string literal xxxxxxxxxxxxxx\n"
+ ")x\",\n"
+ " a),\n"
+ " b);",
+ format("fffffffffff(g(R\"x(\n"
+ "multiline raw string literal xxxxxxxxxxxxxx\n"
+ ")x\", a), b);",
+ getGoogleStyleWithColumns(20)));
+ EXPECT_EQ("fffffffffff(\n"
+ " g(R\"x(qqq\n"
+ "multiline raw string literal xxxxxxxxxxxxxx\n"
+ ")x\",\n"
+ " a),\n"
+ " b);",
+ format("fffffffffff(g(R\"x(qqq\n"
+ "multiline raw string literal xxxxxxxxxxxxxx\n"
+ ")x\", a), b);",
+ getGoogleStyleWithColumns(20)));
+
+ EXPECT_EQ("fffffffffff(R\"x(\n"
+ "multiline raw string literal xxxxxxxxxxxxxx\n"
+ ")x\");",
+ format("fffffffffff(R\"x(\n"
+ "multiline raw string literal xxxxxxxxxxxxxx\n"
+ ")x\");",
+ getGoogleStyleWithColumns(20)));
+ EXPECT_EQ("fffffffffff(R\"x(\n"
+ "multiline raw string literal xxxxxxxxxxxxxx\n"
+ ")x\" +\n"
+ " bbbbbb);",
+ format("fffffffffff(R\"x(\n"
+ "multiline raw string literal xxxxxxxxxxxxxx\n"
+ ")x\" + bbbbbb);",
+ getGoogleStyleWithColumns(20)));
}
TEST_F(FormatTest, SkipsUnknownStringLiterals) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1544.1.patch
Type: text/x-patch
Size: 7064 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130829/9fa28ee9/attachment.bin>
More information about the cfe-commits
mailing list