[PATCH] Store first and last newline position in the token text for string literals and comments.
Alexander Kornienko
alexfh at google.com
Fri Aug 30 04:04:58 PDT 2013
Hi djasper,
Store first and last newline position in the token text for string literals and
comments to avoid doing .find('\n') for each possible solution.
http://llvm-reviews.chandlerc.com/D1556
Files:
lib/Format/ContinuationIndenter.cpp
lib/Format/Format.cpp
lib/Format/FormatToken.h
lib/Format/TokenAnnotator.cpp
Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -583,23 +583,19 @@
unsigned
ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
LineState &State) {
- StringRef Text = Current.TokenText;
+ unsigned StartColumn = State.Column - Current.CodePointCount;
// We can only affect layout of the first and the last line, so the penalty
// for all other lines is constant, and we ignore it.
- size_t FirstLineBreak = Text.find('\n');
- size_t LastLineBreak = Text.find_last_of('\n');
- assert(FirstLineBreak != StringRef::npos);
- unsigned StartColumn = State.Column - Current.CodePointCount;
- State.Column =
- encoding::getCodePointCount(Text.substr(LastLineBreak + 1), Encoding);
+ StringRef LastLine = Current.TokenText.substr(Current.LastNewlineInText + 1);
+ State.Column = encoding::getCodePointCount(LastLine, Encoding);
// Break before further function parameters on all levels.
for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
State.Stack[i].BreakBeforeParameter = true;
+ StringRef FirstLine = Current.TokenText.substr(0, Current.FirstNewlineInText);
unsigned ColumnsUsed =
- StartColumn +
- encoding::getCodePointCount(Text.substr(0, FirstLineBreak), Encoding);
+ StartColumn + encoding::getCodePointCount(FirstLine, Encoding);
if (ColumnsUsed > getColumnLimit())
return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
return 0;
@@ -619,7 +615,7 @@
// Don't break string literals with (in case of non-raw strings, escaped)
// newlines. As clang-format must not change the string's content, it is
// unlikely that we'll end up with a better format.
- if (Current.IsMultiline)
+ if (Current.isMultiline())
return addMultilineStringLiteral(Current, State);
// Only break up default narrow strings.
@@ -649,7 +645,7 @@
// FIXME: If we want to handle them correctly, we'll need to adjust
// leading whitespace in consecutive lines when changing indentation of
// the first line similar to what we do with block comments.
- if (Current.IsMultiline) {
+ if (Current.isMultiline()) {
StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n");
assert(EscapedNewlinePos != StringRef::npos);
State.Column =
@@ -740,7 +736,7 @@
// AlwaysBreakBeforeMultilineStrings implementation.
if (Current.TokenText.startswith("R\""))
return false;
- if (Current.IsMultiline)
+ if (Current.isMultiline())
return true;
if (Current.getNextNonComment() &&
Current.getNextNonComment()->is(tok::string_literal))
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -596,10 +596,10 @@
FormatTok->CodePointCount =
encoding::getCodePointCount(FormatTok->TokenText, Encoding);
- if (FormatTok->isOneOf(tok::string_literal, tok::comment) &&
- FormatTok->TokenText.find('\n') != StringRef::npos)
- FormatTok->IsMultiline = true;
-
+ if (FormatTok->isOneOf(tok::string_literal, tok::comment)) {
+ FormatTok->FirstNewlineInText = FormatTok->TokenText.find('\n');
+ FormatTok->LastNewlineInText = FormatTok->TokenText.find_last_of('\n');
+ }
// FIXME: Add the CodePointCount to Column.
FormatTok->WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
Index: lib/Format/FormatToken.h
===================================================================
--- lib/Format/FormatToken.h
+++ lib/Format/FormatToken.h
@@ -80,7 +80,8 @@
/// whitespace characters preceeding it.
struct FormatToken {
FormatToken()
- : NewlinesBefore(0), HasUnescapedNewline(false), IsMultiline(false),
+ : NewlinesBefore(0), HasUnescapedNewline(false),
+ FirstNewlineInText(StringRef::npos), LastNewlineInText(StringRef::npos),
LastNewlineOffset(0), CodePointCount(0), IsFirst(false),
MustBreakBefore(false), IsUnterminatedLiteral(false),
BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
@@ -104,8 +105,18 @@
/// Token.
bool HasUnescapedNewline;
+ /// \brief Contains a position of the first newline character (escaped or not)
+ /// in the token text or StringRef::npos when the text doesn't contain
+ /// newlines. Is filled for string literals and comments.
+ size_t FirstNewlineInText;
+
+ /// \brief Contains a position of the last newline character (escaped or not)
+ /// in the token text or StringRef::npos when the text doesn't contain
+ /// newlines. Is filled for string literals and comments.
+ size_t LastNewlineInText;
+
/// \brief Whether the token text contains newlines (escaped or not).
- bool IsMultiline;
+ bool isMultiline() const { return FirstNewlineInText != StringRef::npos; }
/// \brief The range of the whitespace immediately preceeding the \c Token.
SourceRange WhitespaceRange;
Index: lib/Format/TokenAnnotator.cpp
===================================================================
--- lib/Format/TokenAnnotator.cpp
+++ lib/Format/TokenAnnotator.cpp
@@ -1024,7 +1024,7 @@
Current->CanBreakBefore =
Current->MustBreakBefore || canBreakBefore(Line, *Current);
if (Current->MustBreakBefore ||
- (Current->is(tok::string_literal) && Current->IsMultiline))
+ (Current->is(tok::string_literal) && Current->isMultiline()))
Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
else
Current->TotalLength = Current->Previous->TotalLength +
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1556.1.patch
Type: text/x-patch
Size: 5809 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130830/2baee1c7/attachment.bin>
More information about the cfe-commits
mailing list