r186454 - clang-format: Improve handling of unterminated string literals.
Daniel Jasper
djasper at google.com
Tue Jul 16 13:28:34 PDT 2013
Author: djasper
Date: Tue Jul 16 15:28:33 2013
New Revision: 186454
URL: http://llvm.org/viewvc/llvm-project?rev=186454&view=rev
Log:
clang-format: Improve handling of unterminated string literals.
Before, clang-format would simply eat these as they were recognized as
whitespace. With this patch, they are mostly left alone.
Modified:
cfe/trunk/lib/Format/Format.cpp
cfe/trunk/lib/Format/FormatToken.h
cfe/trunk/lib/Format/TokenAnnotator.cpp
cfe/trunk/unittests/Format/FormatTest.cpp
Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=186454&r1=186453&r2=186454&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Tue Jul 16 15:28:33 2013
@@ -905,6 +905,10 @@ private:
// Only break up default narrow strings.
if (!Current.TokenText.startswith("\""))
return 0;
+ // Exempts unterminated string literals from line breaking. The user will
+ // likely want to terminate the string before any line breaking is done.
+ if (Current.IsUnterminatedLiteral)
+ return 0;
Token.reset(new BreakableStringLiteral(Current, StartColumn,
Line.InPPDirective, Encoding));
@@ -1259,8 +1263,7 @@ private:
}
FormatTok = new (Allocator.Allocate()) FormatToken;
- Lex.LexFromRawLexer(FormatTok->Tok);
- StringRef Text = rawTokenText(FormatTok->Tok);
+ readRawToken(*FormatTok);
SourceLocation WhitespaceStart =
FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
@@ -1269,16 +1272,16 @@ private:
// Consume and record whitespace until we find a significant token.
unsigned WhitespaceLength = TrailingWhitespace;
while (FormatTok->Tok.is(tok::unknown)) {
- unsigned Newlines = Text.count('\n');
+ unsigned Newlines = FormatTok->TokenText.count('\n');
if (Newlines > 0)
- FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1;
+ FormatTok->LastNewlineOffset =
+ WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1;
FormatTok->NewlinesBefore += Newlines;
- unsigned EscapedNewlines = Text.count("\\\n");
+ unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n");
FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
WhitespaceLength += FormatTok->Tok.getLength();
- Lex.LexFromRawLexer(FormatTok->Tok);
- Text = rawTokenText(FormatTok->Tok);
+ readRawToken(*FormatTok);
}
// In case the token starts with escaped newlines, we want to
@@ -1287,30 +1290,31 @@ private:
// FIXME: What do we want to do with other escaped spaces, and escaped
// spaces or newlines in the middle of tokens?
// FIXME: Add a more explicit test.
- while (Text.size() > 1 && Text[0] == '\\' && Text[1] == '\n') {
+ while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
+ FormatTok->TokenText[1] == '\n') {
// FIXME: ++FormatTok->NewlinesBefore is missing...
WhitespaceLength += 2;
- Text = Text.substr(2);
+ FormatTok->TokenText = FormatTok->TokenText.substr(2);
}
TrailingWhitespace = 0;
if (FormatTok->Tok.is(tok::comment)) {
- StringRef UntrimmedText = Text;
- Text = Text.rtrim();
- TrailingWhitespace = UntrimmedText.size() - Text.size();
+ StringRef UntrimmedText = FormatTok->TokenText;
+ FormatTok->TokenText = FormatTok->TokenText.rtrim();
+ TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
} else if (FormatTok->Tok.is(tok::raw_identifier)) {
- IdentifierInfo &Info = IdentTable.get(Text);
+ IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
FormatTok->Tok.setIdentifierInfo(&Info);
FormatTok->Tok.setKind(Info.getTokenID());
} else if (FormatTok->Tok.is(tok::greatergreater)) {
FormatTok->Tok.setKind(tok::greater);
- Text = Text.substr(0, 1);
+ FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
GreaterStashed = true;
}
// Now FormatTok is the next non-whitespace token.
- FormatTok->TokenText = Text;
- FormatTok->CodePointCount = encoding::getCodePointCount(Text, Encoding);
+ FormatTok->CodePointCount =
+ encoding::getCodePointCount(FormatTok->TokenText, Encoding);
FormatTok->WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
@@ -1327,10 +1331,18 @@ private:
llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
SmallVector<FormatToken *, 16> Tokens;
- /// Returns the text of \c FormatTok.
- StringRef rawTokenText(Token &Tok) {
- return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
- Tok.getLength());
+ void readRawToken(FormatToken &Tok) {
+ Lex.LexFromRawLexer(Tok.Tok);
+ Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
+ Tok.Tok.getLength());
+
+ // For formatting, treat unterminated string literals like normal string
+ // literals.
+ if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
+ Tok.TokenText[0] == '"') {
+ Tok.Tok.setKind(tok::string_literal);
+ Tok.IsUnterminatedLiteral = true;
+ }
}
};
Modified: cfe/trunk/lib/Format/FormatToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatToken.h?rev=186454&r1=186453&r2=186454&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatToken.h (original)
+++ cfe/trunk/lib/Format/FormatToken.h Tue Jul 16 15:28:33 2013
@@ -77,11 +77,12 @@ struct FormatToken {
FormatToken()
: NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
CodePointCount(0), IsFirst(false), MustBreakBefore(false),
- BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
- CanBreakBefore(false), ClosesTemplateDeclaration(false),
- ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
- UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
- LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false),
+ IsUnterminatedLiteral(false), BlockKind(BK_Unknown), Type(TT_Unknown),
+ SpacesRequiredBefore(0), CanBreakBefore(false),
+ ClosesTemplateDeclaration(false), ParameterCount(0),
+ PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0),
+ BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
+ FakeRParens(0), LastInChainOfCalls(false),
PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
Next(NULL) {}
@@ -133,6 +134,9 @@ struct FormatToken {
/// escaped newlines.
StringRef TokenText;
+ /// \brief Set to \c true if this token is an unterminated literal.
+ bool IsUnterminatedLiteral;
+
/// \brief Contains the kind of block if this token is a brace.
BraceBlockKind BlockKind;
Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=186454&r1=186453&r2=186454&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Tue Jul 16 15:28:33 2013
@@ -948,13 +948,14 @@ void TokenAnnotator::calculateFormatting
Current->SpacesRequiredBefore =
spaceRequiredBefore(Line, *Current) ? 1 : 0;
- if (Current->MustBreakBefore) {
- } else if (Current->is(tok::comment)) {
+ if (Current->is(tok::comment)) {
Current->MustBreakBefore = Current->NewlinesBefore > 0;
} else if (Current->Previous->isTrailingComment() ||
(Current->is(tok::string_literal) &&
Current->Previous->is(tok::string_literal))) {
Current->MustBreakBefore = true;
+ } else if (Current->Previous->IsUnterminatedLiteral) {
+ Current->MustBreakBefore = true;
} else if (Current->is(tok::lessless) && Current->Next &&
Current->Previous->is(tok::string_literal) &&
Current->Next->is(tok::string_literal)) {
@@ -969,8 +970,6 @@ void TokenAnnotator::calculateFormatting
Current->getNextNonComment() &&
Current->getNextNonComment()->is(tok::string_literal)) {
Current->MustBreakBefore = true;
- } else {
- Current->MustBreakBefore = false;
}
Current->CanBreakBefore =
Current->MustBreakBefore || canBreakBefore(Line, *Current);
Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=186454&r1=186453&r2=186454&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Tue Jul 16 15:28:33 2013
@@ -5004,6 +5004,11 @@ TEST_F(FormatTest, SkipsUnknownStringLit
format("L\"unsupported literal\";", getGoogleStyleWithColumns(15)));
EXPECT_EQ("R\"x(raw literal)x\";",
format("R\"x(raw literal)x\";", getGoogleStyleWithColumns(15)));
+ verifyFormat("string a = \"unterminated;");
+ EXPECT_EQ("function(\"unterminated,\n"
+ " OtherParameter);",
+ format("function( \"unterminated,\n"
+ " OtherParameter);"));
}
TEST_F(FormatTest, DoesNotTryToParseUDLiteralsInPreCpp11Code) {
More information about the cfe-commits
mailing list