r189758 - Store first and last newline position in the token text for string literals and comments.
Alexander Kornienko
alexfh at google.com
Mon Sep 2 06:58:14 PDT 2013
Author: alexfh
Date: Mon Sep 2 08:58:14 2013
New Revision: 189758
URL: http://llvm.org/viewvc/llvm-project?rev=189758&view=rev
Log:
Store first and last newline position in the token text for string literals and comments.
Summary:
Store first and last newline position in the token text for string literals and
comments to avoid doing .find('\n') for each possible solution.
Reviewers: djasper
Reviewed By: djasper
CC: cfe-commits, klimek
Differential Revision: http://llvm-reviews.chandlerc.com/D1556
Modified:
cfe/trunk/lib/Format/ContinuationIndenter.cpp
cfe/trunk/lib/Format/Format.cpp
cfe/trunk/lib/Format/FormatToken.h
cfe/trunk/lib/Format/TokenAnnotator.cpp
Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=189758&r1=189757&r2=189758&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Mon Sep 2 08:58:14 2013
@@ -583,23 +583,16 @@ unsigned ContinuationIndenter::moveState
unsigned
ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
LineState &State) {
- StringRef Text = Current.TokenText;
- // We can only affect layout of the first and the last line, so the penalty
- // for all other lines is constant, and we ignore it.
- size_t FirstLineBreak = Text.find('\n');
- size_t LastLineBreak = Text.find_last_of('\n');
- assert(FirstLineBreak != StringRef::npos);
- unsigned StartColumn = State.Column - Current.CodePointCount;
- State.Column =
- encoding::getCodePointCount(Text.substr(LastLineBreak + 1), Encoding);
-
// Break before further function parameters on all levels.
for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
State.Stack[i].BreakBeforeParameter = true;
unsigned ColumnsUsed =
- StartColumn +
- encoding::getCodePointCount(Text.substr(0, FirstLineBreak), Encoding);
+ State.Column - Current.CodePointCount + Current.CodePointsInFirstLine;
+ // We can only affect layout of the first and the last line, so the penalty
+ // for all other lines is constant, and we ignore it.
+ State.Column = Current.CodePointsInLastLine;
+
if (ColumnsUsed > getColumnLimit())
return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
return 0;
@@ -619,7 +612,7 @@ unsigned ContinuationIndenter::breakProt
// Don't break string literals with (in case of non-raw strings, escaped)
// newlines. As clang-format must not change the string's content, it is
// unlikely that we'll end up with a better format.
- if (Current.IsMultiline)
+ if (Current.isMultiline())
return addMultilineStringLiteral(Current, State);
// Only break up default narrow strings.
@@ -649,14 +642,8 @@ unsigned ContinuationIndenter::breakProt
// FIXME: If we want to handle them correctly, we'll need to adjust
// leading whitespace in consecutive lines when changing indentation of
// the first line similar to what we do with block comments.
- if (Current.IsMultiline) {
- StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n");
- assert(EscapedNewlinePos != StringRef::npos);
- State.Column =
- StartColumn +
- encoding::getCodePointCount(
- Current.TokenText.substr(0, EscapedNewlinePos), Encoding) +
- 1;
+ if (Current.isMultiline()) {
+ State.Column = StartColumn + Current.CodePointsInFirstLine;
return 0;
}
@@ -740,7 +727,7 @@ bool ContinuationIndenter::NextIsMultili
// AlwaysBreakBeforeMultilineStrings implementation.
if (Current.TokenText.startswith("R\""))
return false;
- if (Current.IsMultiline)
+ if (Current.isMultiline())
return true;
if (Current.getNextNonComment() &&
Current.getNextNonComment()->is(tok::string_literal))
Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=189758&r1=189757&r2=189758&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Mon Sep 2 08:58:14 2013
@@ -596,10 +596,16 @@ private:
FormatTok->CodePointCount =
encoding::getCodePointCount(FormatTok->TokenText, Encoding);
- if (FormatTok->isOneOf(tok::string_literal, tok::comment) &&
- FormatTok->TokenText.find('\n') != StringRef::npos)
- FormatTok->IsMultiline = true;
-
+ if (FormatTok->isOneOf(tok::string_literal, tok::comment)) {
+ StringRef Text = FormatTok->TokenText;
+ size_t FirstNewlinePos = Text.find('\n');
+ if (FirstNewlinePos != StringRef::npos) {
+ FormatTok->CodePointsInFirstLine = encoding::getCodePointCount(
+ Text.substr(0, FirstNewlinePos), Encoding);
+ FormatTok->CodePointsInLastLine = encoding::getCodePointCount(
+ Text.substr(Text.find_last_of('\n') + 1), Encoding);
+ }
+ }
// FIXME: Add the CodePointCount to Column.
FormatTok->WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
Modified: cfe/trunk/lib/Format/FormatToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatToken.h?rev=189758&r1=189757&r2=189758&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatToken.h (original)
+++ cfe/trunk/lib/Format/FormatToken.h Mon Sep 2 08:58:14 2013
@@ -80,9 +80,9 @@ class TokenRole;
/// whitespace characters preceeding it.
struct FormatToken {
FormatToken()
- : NewlinesBefore(0), HasUnescapedNewline(false), IsMultiline(false),
- LastNewlineOffset(0), CodePointCount(0), IsFirst(false),
- MustBreakBefore(false), IsUnterminatedLiteral(false),
+ : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
+ CodePointCount(0), CodePointsInFirstLine(0), CodePointsInLastLine(0),
+ IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
CanBreakBefore(false), ClosesTemplateDeclaration(false),
ParameterCount(0), PackingKind(PPK_Inconclusive), TotalLength(0),
@@ -104,9 +104,6 @@ struct FormatToken {
/// Token.
bool HasUnescapedNewline;
- /// \brief Whether the token text contains newlines (escaped or not).
- bool IsMultiline;
-
/// \brief The range of the whitespace immediately preceeding the \c Token.
SourceRange WhitespaceRange;
@@ -118,6 +115,19 @@ struct FormatToken {
/// We need this to correctly measure number of columns a token spans.
unsigned CodePointCount;
+ /// \brief Contains the number of code points in the first line of a
+ /// multi-line string literal or comment. Zero if there's no newline in the
+ /// token.
+ unsigned CodePointsInFirstLine;
+
+ /// \brief Contains the number of code points in the last line of a
+ /// multi-line string literal or comment. Can be zero for line comments.
+ unsigned CodePointsInLastLine;
+
+ /// \brief Returns \c true if the token text contains newlines (escaped or
+ /// not).
+ bool isMultiline() const { return CodePointsInFirstLine != 0; }
+
/// \brief Indicates that this is the first token.
bool IsFirst;
Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=189758&r1=189757&r2=189758&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Mon Sep 2 08:58:14 2013
@@ -1025,7 +1025,7 @@ void TokenAnnotator::calculateFormatting
Current->CanBreakBefore =
Current->MustBreakBefore || canBreakBefore(Line, *Current);
if (Current->MustBreakBefore ||
- (Current->is(tok::string_literal) && Current->IsMultiline))
+ (Current->is(tok::string_literal) && Current->isMultiline()))
Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
else
Current->TotalLength = Current->Previous->TotalLength +
More information about the cfe-commits
mailing list