r190395 - Calculate and store ColumnWidth instead of CodePointCount in FormatTokens.
Alexander Kornienko
alexfh at google.com
Tue Sep 10 02:38:25 PDT 2013
Author: alexfh
Date: Tue Sep 10 04:38:25 2013
New Revision: 190395
URL: http://llvm.org/viewvc/llvm-project?rev=190395&view=rev
Log:
Calculate and store ColumnWidth instead of CodePointCount in FormatTokens.
Summary:
This fixes various issues with mixed tabs and spaces handling, e.g.
when realigning block comments.
Reviewers: klimek, djasper
Reviewed By: djasper
CC: cfe-commits
Differential Revision: http://llvm-reviews.chandlerc.com/D1608
Modified:
cfe/trunk/lib/Format/BreakableToken.cpp
cfe/trunk/lib/Format/ContinuationIndenter.cpp
cfe/trunk/lib/Format/Format.cpp
cfe/trunk/lib/Format/FormatToken.cpp
cfe/trunk/lib/Format/FormatToken.h
cfe/trunk/lib/Format/TokenAnnotator.cpp
cfe/trunk/unittests/Format/FormatTest.cpp
Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=190395&r1=190394&r2=190395&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Tue Sep 10 04:38:25 2013
@@ -337,8 +337,10 @@ void BreakableBlockComment::adjustWhites
// if leading tabs are intermixed with spaces, that is not a high priority.
// Adjust the start column uniformly accross all lines.
- StartOfLineColumn[LineIndex] =
- std::max<int>(0, Whitespace.size() + IndentDelta);
+ StartOfLineColumn[LineIndex] = std::max<int>(
+ 0,
+ encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
+ IndentDelta);
}
unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=190395&r1=190394&r2=190395&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Tue Sep 10 04:38:25 2013
@@ -201,7 +201,7 @@ unsigned ContinuationIndenter::addTokenT
State.NextToken->WhitespaceRange.getEnd()) -
SourceMgr.getSpellingColumnNumber(
State.NextToken->WhitespaceRange.getBegin());
- State.Column += WhitespaceLength + State.NextToken->CodePointCount;
+ State.Column += WhitespaceLength + State.NextToken->ColumnWidth;
State.NextToken = State.NextToken->Next;
return 0;
}
@@ -257,11 +257,11 @@ unsigned ContinuationIndenter::addTokenT
State.Line->StartsDefinition))) {
State.Column = State.Stack.back().Indent;
} else if (Current.Type == TT_ObjCSelectorName) {
- if (State.Stack.back().ColonPos > Current.CodePointCount) {
- State.Column = State.Stack.back().ColonPos - Current.CodePointCount;
+ if (State.Stack.back().ColonPos > Current.ColumnWidth) {
+ State.Column = State.Stack.back().ColonPos - Current.ColumnWidth;
} else {
State.Column = State.Stack.back().Indent;
- State.Stack.back().ColonPos = State.Column + Current.CodePointCount;
+ State.Stack.back().ColonPos = State.Column + Current.ColumnWidth;
}
} else if (Current.is(tok::l_square) && Current.Type != TT_ObjCMethodExpr &&
Current.Type != TT_LambdaLSquare) {
@@ -307,7 +307,7 @@ unsigned ContinuationIndenter::addTokenT
if (!Current.isTrailingComment())
State.Stack.back().LastSpace = State.Column;
if (Current.isMemberAccess())
- State.Stack.back().LastSpace += Current.CodePointCount;
+ State.Stack.back().LastSpace += Current.ColumnWidth;
State.StartOfLineLevel = State.ParenLevel;
State.LowestLevelOnLine = State.ParenLevel;
@@ -343,8 +343,8 @@ unsigned ContinuationIndenter::addTokenT
State.Stack.back().VariablePos = State.Column;
// Move over * and & if they are bound to the variable name.
const FormatToken *Tok = &Previous;
- while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) {
- State.Stack.back().VariablePos -= Tok->CodePointCount;
+ while (Tok && State.Stack.back().VariablePos >= Tok->ColumnWidth) {
+ State.Stack.back().VariablePos -= Tok->ColumnWidth;
if (Tok->SpacesRequiredBefore != 0)
break;
Tok = Tok->Previous;
@@ -361,12 +361,12 @@ unsigned ContinuationIndenter::addTokenT
if (Current.Type == TT_ObjCSelectorName &&
State.Stack.back().ColonPos == 0) {
if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
- State.Column + Spaces + Current.CodePointCount)
+ State.Column + Spaces + Current.ColumnWidth)
State.Stack.back().ColonPos =
State.Stack.back().Indent + Current.LongestObjCSelectorName;
else
State.Stack.back().ColonPos =
- State.Column + Spaces + Current.CodePointCount;
+ State.Column + Spaces + Current.ColumnWidth;
}
if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr &&
@@ -436,7 +436,7 @@ unsigned ContinuationIndenter::moveState
std::min(State.LowestLevelOnLine, State.ParenLevel);
if (Current.isMemberAccess())
State.Stack.back().StartOfFunctionCall =
- Current.LastInChainOfCalls ? 0 : State.Column + Current.CodePointCount;
+ Current.LastInChainOfCalls ? 0 : State.Column + Current.ColumnWidth;
if (Current.Type == TT_CtorInitializerColon) {
// Indent 2 from the column, so:
// SomeClass::SomeClass()
@@ -592,7 +592,7 @@ unsigned ContinuationIndenter::moveState
State.StartOfStringLiteral = 0;
}
- State.Column += Current.CodePointCount;
+ State.Column += Current.ColumnWidth;
State.NextToken = State.NextToken->Next;
unsigned Penalty = breakProtrudingToken(Current, State, DryRun);
if (State.Column > getColumnLimit(State)) {
@@ -618,8 +618,7 @@ ContinuationIndenter::addMultilineString
for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
State.Stack[i].BreakBeforeParameter = true;
- unsigned ColumnsUsed =
- State.Column - Current.CodePointCount + Current.FirstLineColumnWidth;
+ unsigned ColumnsUsed = State.Column;
// We can only affect layout of the first and the last line, so the penalty
// for all other lines is constant, and we ignore it.
State.Column = Current.LastLineColumnWidth;
@@ -636,14 +635,14 @@ unsigned ContinuationIndenter::breakProt
return 0;
llvm::OwningPtr<BreakableToken> Token;
- unsigned StartColumn = State.Column - Current.CodePointCount;
+ unsigned StartColumn = State.Column - Current.ColumnWidth;
if (Current.is(tok::string_literal) &&
Current.Type != TT_ImplicitStringLiteral) {
// Don't break string literals with (in case of non-raw strings, escaped)
// newlines. As clang-format must not change the string's content, it is
// unlikely that we'll end up with a better format.
- if (Current.isMultiline())
+ if (Current.IsMultiline)
return addMultilineStringLiteral(Current, State);
// Only break up default narrow strings.
@@ -657,11 +656,8 @@ unsigned ContinuationIndenter::breakProt
Token.reset(new BreakableStringLiteral(
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
} else if (Current.Type == TT_BlockComment && Current.isTrailingComment()) {
- unsigned OriginalStartColumn =
- SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) -
- 1;
Token.reset(new BreakableBlockComment(
- Current, StartColumn, OriginalStartColumn, !Current.Previous,
+ Current, StartColumn, Current.OriginalColumn, !Current.Previous,
State.Line->InPPDirective, Encoding, Style));
} else if (Current.Type == TT_LineComment &&
(Current.Previous == NULL ||
@@ -673,10 +669,8 @@ unsigned ContinuationIndenter::breakProt
// FIXME: If we want to handle them correctly, we'll need to adjust
// leading whitespace in consecutive lines when changing indentation of
// the first line similar to what we do with block comments.
- if (Current.isMultiline()) {
- State.Column = StartColumn + Current.FirstLineColumnWidth;
+ if (Current.IsMultiline)
return 0;
- }
Token.reset(new BreakableLineComment(
Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
@@ -759,12 +753,12 @@ bool ContinuationIndenter::NextIsMultili
// AlwaysBreakBeforeMultilineStrings implementation.
if (Current.TokenText.startswith("R\""))
return false;
- if (Current.isMultiline())
+ if (Current.IsMultiline)
return true;
if (Current.getNextNonComment() &&
Current.getNextNonComment()->is(tok::string_literal))
return true; // Implicit concatenation.
- if (State.Column + Current.CodePointCount + Current.UnbreakableTailLength >
+ if (State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
Style.ColumnLimit)
return true; // String will be split.
return false;
Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=190395&r1=190394&r2=190395&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Tue Sep 10 04:38:25 2013
@@ -610,7 +610,7 @@ private:
FormatTok->WhitespaceRange =
SourceRange(GreaterLocation, GreaterLocation);
FormatTok->TokenText = ">";
- FormatTok->CodePointCount = 1;
+ FormatTok->ColumnWidth = 1;
GreaterStashed = false;
return FormatTok;
}
@@ -666,6 +666,10 @@ private:
Column = 0;
FormatTok->TokenText = FormatTok->TokenText.substr(2);
}
+
+ FormatTok->WhitespaceRange = SourceRange(
+ WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
+
FormatTok->OriginalColumn = Column;
TrailingWhitespace = 0;
@@ -685,24 +689,29 @@ private:
}
// Now FormatTok is the next non-whitespace token.
- FormatTok->CodePointCount =
- encoding::getCodePointCount(FormatTok->TokenText, Encoding);
- if (FormatTok->isOneOf(tok::string_literal, tok::comment)) {
- StringRef Text = FormatTok->TokenText;
- size_t FirstNewlinePos = Text.find('\n');
- if (FirstNewlinePos != StringRef::npos) {
- // FIXME: Handle embedded tabs.
- FormatTok->FirstLineColumnWidth = encoding::columnWidthWithTabs(
- Text.substr(0, FirstNewlinePos), 0, Style.TabWidth, Encoding);
- FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
- Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
- Encoding);
- }
+ StringRef Text = FormatTok->TokenText;
+ size_t FirstNewlinePos = Text.find('\n');
+ if (FirstNewlinePos != StringRef::npos) {
+ FormatTok->IsMultiline = true;
+ // The last line of the token always starts in column 0.
+ // Thus, the length can be precomputed even in the presence of tabs.
+ FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
+ Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
+ Encoding);
+ Text = Text.substr(0, FirstNewlinePos);
}
- // FIXME: Add the CodePointCount to Column.
- FormatTok->WhitespaceRange = SourceRange(
- WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
+
+ // FIXME: ColumnWidth actually depends on the start column, we need to
+ // take this into account when the token is moved.
+ FormatTok->ColumnWidth =
+ encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
+
+ // FIXME: For multi-line tokens this should be LastLineColumnWidth.
+ // For line comments this should probably be zero. But before changing,
+ // we need good tests for this.
+ Column += FormatTok->ColumnWidth;
+
return FormatTok;
}
Modified: cfe/trunk/lib/Format/FormatToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatToken.cpp?rev=190395&r1=190394&r2=190395&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatToken.cpp (original)
+++ cfe/trunk/lib/Format/FormatToken.cpp Tue Sep 10 04:38:25 2013
@@ -42,7 +42,7 @@ unsigned CommaSeparatedList::format(Line
// Calculate the number of code points we have to format this list. As the
// first token is already placed, we have to subtract it.
unsigned RemainingCodePoints = Style.ColumnLimit - State.Column +
- State.NextToken->Previous->CodePointCount;
+ State.NextToken->Previous->ColumnWidth;
// Find the best ColumnFormat, i.e. the best number of columns to use.
const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
@@ -82,7 +82,7 @@ unsigned CommaSeparatedList::format(Line
static unsigned CodePointsBetween(const FormatToken *Begin,
const FormatToken *End) {
assert(End->TotalLength >= Begin->TotalLength);
- return End->TotalLength - Begin->TotalLength + Begin->CodePointCount;
+ return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
}
void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
Modified: cfe/trunk/lib/Format/FormatToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatToken.h?rev=190395&r1=190394&r2=190395&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatToken.h (original)
+++ cfe/trunk/lib/Format/FormatToken.h Tue Sep 10 04:38:25 2013
@@ -83,7 +83,7 @@ class AnnotatedLine;
struct FormatToken {
FormatToken()
: NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
- CodePointCount(0), FirstLineColumnWidth(0), LastLineColumnWidth(0),
+ ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false),
IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false),
BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
CanBreakBefore(false), ClosesTemplateDeclaration(false),
@@ -114,22 +114,17 @@ struct FormatToken {
/// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
unsigned LastNewlineOffset;
- /// \brief The length of the non-whitespace parts of the token in CodePoints.
+ /// \brief The width of the non-whitespace parts of the token (or its first
+ /// line for multi-line tokens) in columns.
/// We need this to correctly measure number of columns a token spans.
- unsigned CodePointCount;
+ unsigned ColumnWidth;
- /// \brief Contains the number of code points in the first line of a
- /// multi-line string literal or comment. Zero if there's no newline in the
+ /// \brief Contains the width in columns of the last line of a multi-line
/// token.
- unsigned FirstLineColumnWidth;
-
- /// \brief Contains the number of code points in the last line of a
- /// multi-line string literal or comment. Can be zero for line comments.
unsigned LastLineColumnWidth;
- /// \brief Returns \c true if the token text contains newlines (escaped or
- /// not).
- bool isMultiline() const { return FirstLineColumnWidth != 0; }
+ /// \brief Whether the token text contains newlines (escaped or not).
+ bool IsMultiline;
/// \brief Indicates that this is the first token.
bool IsFirst;
@@ -189,12 +184,8 @@ struct FormatToken {
/// token.
unsigned TotalLength;
- /// \brief The original column of this token, including expanded tabs.
- /// The configured IndentWidth is used as tab width. Only tabs in whitespace
- /// are expanded.
- /// FIXME: This is currently only used on the first token of an unwrapped
- /// line, and the implementation is not correct for other tokens (see the
- /// FIXMEs in FormatTokenLexer::getNextToken()).
+ /// \brief The original 0-based column of this token, including expanded tabs.
+ /// The configured TabWidth is used as tab width.
unsigned OriginalColumn;
/// \brief The length of following tokens until the next natural split point,
Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=190395&r1=190394&r2=190395&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Tue Sep 10 04:38:25 2013
@@ -326,10 +326,10 @@ private:
Line.First->Type == TT_ObjCMethodSpecifier) {
Tok->Type = TT_ObjCMethodExpr;
Tok->Previous->Type = TT_ObjCSelectorName;
- if (Tok->Previous->CodePointCount >
+ if (Tok->Previous->ColumnWidth >
Contexts.back().LongestObjCSelectorName) {
Contexts.back().LongestObjCSelectorName =
- Tok->Previous->CodePointCount;
+ Tok->Previous->ColumnWidth;
}
if (Contexts.back().FirstObjCSelectorName == NULL)
Contexts.back().FirstObjCSelectorName = Tok->Previous;
@@ -1022,7 +1022,8 @@ void TokenAnnotator::annotate(AnnotatedL
}
void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
- Line.First->TotalLength = Line.First->CodePointCount;
+ Line.First->TotalLength =
+ Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
if (!Line.First->Next)
return;
FormatToken *Current = Line.First->Next;
@@ -1055,11 +1056,11 @@ void TokenAnnotator::calculateFormatting
Current->CanBreakBefore =
Current->MustBreakBefore || canBreakBefore(Line, *Current);
if (Current->MustBreakBefore || !Current->Children.empty() ||
- (Current->is(tok::string_literal) && Current->isMultiline()))
+ Current->IsMultiline)
Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
else
Current->TotalLength = Current->Previous->TotalLength +
- Current->CodePointCount +
+ Current->ColumnWidth +
Current->SpacesRequiredBefore;
// FIXME: Only calculate this if CanBreakBefore is true once static
// initializers etc. are sorted out.
@@ -1095,7 +1096,7 @@ void TokenAnnotator::calculateUnbreakabl
UnbreakableTailLength = 0;
} else {
UnbreakableTailLength +=
- Current->CodePointCount + Current->SpacesRequiredBefore;
+ Current->ColumnWidth + Current->SpacesRequiredBefore;
}
Current = Current->Previous;
}
Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=190395&r1=190394&r2=190395&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Tue Sep 10 04:38:25 2013
@@ -5738,33 +5738,48 @@ TEST_F(FormatTest, ConfigurableUseOfTab)
// FIXME: To correctly count mixed whitespace we need to
// also correctly count mixed whitespace in front of the comment.
- // Tab.TabWidth = 8;
- // Tab.IndentWidth = 8;
- // EXPECT_EQ("/*\n"
- // "\t a\t\tcomment\n"
- // "\t in multiple lines\n"
- // " */",
- // format(" /*\t \t \n"
- // " \t \t a\t\tcomment\t \t\n"
- // " \t \t in multiple lines\t\n"
- // " \t */",
- // Tab));
- // Tab.UseTab = false;
- // EXPECT_EQ("/*\n"
- // " a\t\tcomment\n"
- // " in multiple lines\n"
- // " */",
- // format(" /*\t \t \n"
- // " \t \t a\t\tcomment\t \t\n"
- // " \t \t in multiple lines\t\n"
- // " \t */",
- // Tab));
- // EXPECT_EQ("/* some\n"
- // " comment */",
- // format(" \t \t /* some\n"
- // " \t \t comment */",
- // Tab));
+ Tab.TabWidth = 8;
+ Tab.IndentWidth = 8;
+ EXPECT_EQ("/*\n"
+ "\t a\t\tcomment\n"
+ "\t in multiple lines\n"
+ " */",
+ format(" /*\t \t \n"
+ " \t \t a\t\tcomment\t \t\n"
+ " \t \t in multiple lines\t\n"
+ " \t */",
+ Tab));
+ Tab.UseTab = false;
+ EXPECT_EQ("/*\n"
+ " a\t\tcomment\n"
+ " in multiple lines\n"
+ " */",
+ format(" /*\t \t \n"
+ " \t \t a\t\tcomment\t \t\n"
+ " \t \t in multiple lines\t\n"
+ " \t */",
+ Tab));
+ EXPECT_EQ("/* some\n"
+ " comment */",
+ format(" \t \t /* some\n"
+ " \t \t comment */",
+ Tab));
+ EXPECT_EQ("int a; /* some\n"
+ " comment */",
+ format(" \t \t int a; /* some\n"
+ " \t \t comment */",
+ Tab));
+ EXPECT_EQ("int a; /* some\n"
+ "comment */",
+ format(" \t \t int\ta; /* some\n"
+ " \t \t comment */",
+ Tab));
+ EXPECT_EQ("f(\"\t\t\"); /* some\n"
+ " comment */",
+ format(" \t \t f(\"\t\t\"); /* some\n"
+ " \t \t comment */",
+ Tab));
EXPECT_EQ("{\n"
" /*\n"
" * Comment\n"
More information about the cfe-commits
mailing list