r299927 - [clang-format] Handle NSString literals by merging tokens.
Alexander Kornienko via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 11 02:55:00 PDT 2017
Author: alexfh
Date: Tue Apr 11 04:55:00 2017
New Revision: 299927
URL: http://llvm.org/viewvc/llvm-project?rev=299927&view=rev
Log:
[clang-format] Handle NSString literals by merging tokens.
Summary:
This fixes a few outstanding bugs:
* incorrect breaking of NSString literals containing double-width characters;
* inconsistent formatting of ObjC dictionary literals containing NSString
literals;
* AlwaysBreakBeforeMultilineStrings ignoring implicitly-concatenated NSString
literals.
Reviewers: djasper
Reviewed By: djasper
Subscribers: klimek, cfe-commits
Differential Revision: https://reviews.llvm.org/D31706
Modified:
cfe/trunk/lib/Format/BreakableToken.cpp
cfe/trunk/lib/Format/ContinuationIndenter.cpp
cfe/trunk/lib/Format/FormatTokenLexer.cpp
cfe/trunk/lib/Format/FormatTokenLexer.h
cfe/trunk/lib/Format/TokenAnnotator.cpp
cfe/trunk/unittests/Format/FormatTest.cpp
Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=299927&r1=299926&r2=299927&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Tue Apr 11 04:55:00 2017
@@ -186,7 +186,7 @@ BreakableSingleLineToken::BreakableSingl
const FormatStyle &Style)
: BreakableToken(Tok, InPPDirective, Encoding, Style),
StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
- assert(Tok.TokenText.endswith(Postfix));
+ assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
Line = Tok.TokenText.substr(
Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
}
@@ -210,16 +210,9 @@ BreakableStringLiteral::getSplit(unsigne
void BreakableStringLiteral::insertBreak(unsigned LineIndex,
unsigned TailOffset, Split Split,
WhitespaceManager &Whitespaces) {
- unsigned LeadingSpaces = StartColumn;
- // The '@' of an ObjC string literal (@"Test") does not become part of the
- // string token.
- // FIXME: It might be a cleaner solution to merge the tokens as a
- // precomputation step.
- if (Prefix.startswith("@"))
- --LeadingSpaces;
Whitespaces.replaceWhitespaceInToken(
Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
- Prefix, InPPDirective, 1, LeadingSpaces);
+ Prefix, InPPDirective, 1, StartColumn);
}
BreakableComment::BreakableComment(const FormatToken &Token,
Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=299927&r1=299926&r2=299927&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Tue Apr 11 04:55:00 2017
@@ -679,11 +679,11 @@ unsigned ContinuationIndenter::getNewLin
if (Current.is(tok::identifier) && Current.Next &&
Current.Next->is(TT_DictLiteral))
return State.Stack.back().Indent;
- if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0)
- return State.StartOfStringLiteral;
if (NextNonComment->is(TT_ObjCStringLiteral) &&
State.StartOfStringLiteral != 0)
return State.StartOfStringLiteral - 1;
+ if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0)
+ return State.StartOfStringLiteral;
if (NextNonComment->is(tok::lessless) &&
State.Stack.back().FirstLessLess != 0)
return State.Stack.back().FirstLessLess;
@@ -864,10 +864,10 @@ unsigned ContinuationIndenter::moveState
moveStatePastScopeOpener(State, Newline);
moveStatePastFakeRParens(State);
- if (Current.isStringLiteral() && State.StartOfStringLiteral == 0)
- State.StartOfStringLiteral = State.Column;
if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0)
State.StartOfStringLiteral = State.Column + 1;
+ else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0)
+ State.StartOfStringLiteral = State.Column;
else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
!Current.isStringLiteral())
State.StartOfStringLiteral = 0;
@@ -1175,18 +1175,12 @@ unsigned ContinuationIndenter::breakProt
StringRef Text = Current.TokenText;
StringRef Prefix;
StringRef Postfix;
- bool IsNSStringLiteral = false;
// FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
// FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
// reduce the overhead) for each FormatToken, which is a string, so that we
// don't run multiple checks here on the hot path.
- if (Text.startswith("\"") && Current.Previous &&
- Current.Previous->is(tok::at)) {
- IsNSStringLiteral = true;
- Prefix = "@\"";
- }
if ((Text.endswith(Postfix = "\"") &&
- (IsNSStringLiteral || Text.startswith(Prefix = "\"") ||
+ (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") ||
Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") ||
Text.startswith(Prefix = "u8\"") ||
Text.startswith(Prefix = "L\""))) ||
Modified: cfe/trunk/lib/Format/FormatTokenLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatTokenLexer.cpp?rev=299927&r1=299926&r2=299927&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatTokenLexer.cpp (original)
+++ cfe/trunk/lib/Format/FormatTokenLexer.cpp Tue Apr 11 04:55:00 2017
@@ -64,6 +64,8 @@ void FormatTokenLexer::tryMergePreviousT
return;
if (tryMergeLessLess())
return;
+ if (tryMergeNSStringLiteral())
+ return;
if (Style.Language == FormatStyle::LK_JavaScript) {
static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};
@@ -84,6 +86,22 @@ void FormatTokenLexer::tryMergePreviousT
}
}
+bool FormatTokenLexer::tryMergeNSStringLiteral() {
+ if (Tokens.size() < 2)
+ return false;
+ auto &At = *(Tokens.end() - 2);
+ auto &String = *(Tokens.end() - 1);
+ if (!At->is(tok::at) || !String->is(tok::string_literal))
+ return false;
+ At->Tok.setKind(tok::string_literal);
+ At->TokenText = StringRef(At->TokenText.begin(),
+ String->TokenText.end() - At->TokenText.begin());
+ At->ColumnWidth += String->ColumnWidth;
+ At->Type = TT_ObjCStringLiteral;
+ Tokens.erase(Tokens.end() - 1);
+ return true;
+}
+
bool FormatTokenLexer::tryMergeLessLess() {
// Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
if (Tokens.size() < 3)
Modified: cfe/trunk/lib/Format/FormatTokenLexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatTokenLexer.h?rev=299927&r1=299926&r2=299927&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatTokenLexer.h (original)
+++ cfe/trunk/lib/Format/FormatTokenLexer.h Tue Apr 11 04:55:00 2017
@@ -47,6 +47,7 @@ private:
void tryMergePreviousTokens();
bool tryMergeLessLess();
+ bool tryMergeNSStringLiteral();
bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=299927&r1=299926&r2=299927&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Tue Apr 11 04:55:00 2017
@@ -907,7 +907,7 @@ private:
TT_FunctionLBrace, TT_ImplicitStringLiteral,
TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow,
TT_OverloadedOperator, TT_RegexLiteral,
- TT_TemplateString))
+ TT_TemplateString, TT_ObjCStringLiteral))
CurrentToken->Type = TT_Unknown;
CurrentToken->Role.reset();
CurrentToken->MatchingParen = nullptr;
@@ -1120,21 +1120,17 @@ private:
}
}
} else if (Current.is(tok::at) && Current.Next) {
- if (Current.Next->isStringLiteral()) {
- Current.Type = TT_ObjCStringLiteral;
- } else {
- switch (Current.Next->Tok.getObjCKeywordID()) {
- case tok::objc_interface:
- case tok::objc_implementation:
- case tok::objc_protocol:
- Current.Type = TT_ObjCDecl;
- break;
- case tok::objc_property:
- Current.Type = TT_ObjCProperty;
- break;
- default:
- break;
- }
+ switch (Current.Next->Tok.getObjCKeywordID()) {
+ case tok::objc_interface:
+ case tok::objc_implementation:
+ case tok::objc_protocol:
+ Current.Type = TT_ObjCDecl;
+ break;
+ case tok::objc_property:
+ Current.Type = TT_ObjCProperty;
+ break;
+ default:
+ break;
}
} else if (Current.is(tok::period)) {
FormatToken *PreviousNoComment = Current.getPreviousNonComment();
@@ -2457,8 +2453,7 @@ bool TokenAnnotator::mustBreakBefore(con
} else if (Style.Language == FormatStyle::LK_Cpp ||
Style.Language == FormatStyle::LK_ObjC ||
Style.Language == FormatStyle::LK_Proto) {
- if (Left.isStringLiteral() &&
- (Right.isStringLiteral() || Right.is(TT_ObjCStringLiteral)))
+ if (Left.isStringLiteral() && Right.isStringLiteral())
return true;
}
Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=299927&r1=299926&r2=299927&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Tue Apr 11 04:55:00 2017
@@ -4090,9 +4090,9 @@ TEST_F(FormatTest, AlwaysBreakBeforeMult
"c\";",
Break));
- // Exempt ObjC strings for now.
- EXPECT_EQ("NSString *const kString = @\"aaaa\"\n"
- " @\"bbbb\";",
+ EXPECT_EQ("NSString *const kString =\n"
+ " @\"aaaa\"\n"
+ " @\"bbbb\";",
format("NSString *const kString = @\"aaaa\"\n"
"@\"bbbb\";",
Break));
@@ -6460,6 +6460,7 @@ TEST_F(FormatTest, BreaksWideAndNSString
EXPECT_EQ("@\"NSString \"\n"
"@\"literal\";",
format("@\"NSString literal\";", getGoogleStyleWithColumns(19)));
+ verifyFormat(R"(NSString *s = @"é£é£é£é£";)", getLLVMStyleWithColumns(26));
// This input makes clang-format try to split the incomplete unicode escape
// sequence, which used to lead to a crasher.
@@ -8301,7 +8302,14 @@ TEST_F(FormatTest, AllmanBraceBreaking)
// .. or dict literals.
verifyFormat("void f()\n"
"{\n"
- " [object someMethod:@{ @\"a\" : @\"b\" }];\n"
+ " // ...\n"
+ " [object someMethod:@{@\"a\" : @\"b\"}];\n"
+ "}",
+ AllmanBraceStyle);
+ verifyFormat("void f()\n"
+ "{\n"
+ " // ...\n"
+ " [object someMethod:@{a : @\"b\"}];\n"
"}",
AllmanBraceStyle);
verifyFormat("int f()\n"
More information about the cfe-commits
mailing list