[PATCH] Store first and last newline position in the token text for string literals and comments.

Fri Aug 30 04:04:58 PDT 2013

Hi djasper,

Store first and last newline position in the token text for string literals and
comments to avoid doing .find('\n') for each possible solution.

http://llvm-reviews.chandlerc.com/D1556

Files:
  lib/Format/ContinuationIndenter.cpp
  lib/Format/Format.cpp
  lib/Format/FormatToken.h
  lib/Format/TokenAnnotator.cpp

Index: lib/Format/ContinuationIndenter.cpp
===================================================================

--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -583,23 +583,19 @@
 unsigned
 ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
                                                 LineState &State) {
-  StringRef Text = Current.TokenText;
+  unsigned StartColumn = State.Column - Current.CodePointCount;
   // We can only affect layout of the first and the last line, so the penalty
   // for all other lines is constant, and we ignore it.
-  size_t FirstLineBreak = Text.find('\n');
-  size_t LastLineBreak = Text.find_last_of('\n');
-  assert(FirstLineBreak != StringRef::npos);
-  unsigned StartColumn = State.Column - Current.CodePointCount;
-  State.Column =
-      encoding::getCodePointCount(Text.substr(LastLineBreak + 1), Encoding);
+  StringRef LastLine = Current.TokenText.substr(Current.LastNewlineInText + 1);
+  State.Column = encoding::getCodePointCount(LastLine, Encoding);
 
   // Break before further function parameters on all levels.
   for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
     State.Stack[i].BreakBeforeParameter = true;
 
+  StringRef FirstLine = Current.TokenText.substr(0, Current.FirstNewlineInText);
   unsigned ColumnsUsed =
-      StartColumn +
-      encoding::getCodePointCount(Text.substr(0, FirstLineBreak), Encoding);
+      StartColumn + encoding::getCodePointCount(FirstLine, Encoding);
   if (ColumnsUsed > getColumnLimit())
     return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit());
   return 0;
@@ -619,7 +615,7 @@
     // Don't break string literals with (in case of non-raw strings, escaped)
     // newlines. As clang-format must not change the string's content, it is
     // unlikely that we'll end up with a better format.
-    if (Current.IsMultiline)
+    if (Current.isMultiline())
       return addMultilineStringLiteral(Current, State);
 
     // Only break up default narrow strings.
@@ -649,7 +645,7 @@
     // FIXME: If we want to handle them correctly, we'll need to adjust
     // leading whitespace in consecutive lines when changing indentation of
     // the first line similar to what we do with block comments.
-    if (Current.IsMultiline) {
+    if (Current.isMultiline()) {
       StringRef::size_type EscapedNewlinePos = Current.TokenText.find("\\\n");
       assert(EscapedNewlinePos != StringRef::npos);
       State.Column =
@@ -740,7 +736,7 @@
   // AlwaysBreakBeforeMultilineStrings implementation.
   if (Current.TokenText.startswith("R\""))
     return false;
-  if (Current.IsMultiline)
+  if (Current.isMultiline())
     return true;
   if (Current.getNextNonComment() &&
       Current.getNextNonComment()->is(tok::string_literal))
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -596,10 +596,10 @@
     FormatTok->CodePointCount =
         encoding::getCodePointCount(FormatTok->TokenText, Encoding);
 
-    if (FormatTok->isOneOf(tok::string_literal, tok::comment) &&
-        FormatTok->TokenText.find('\n') != StringRef::npos)
-      FormatTok->IsMultiline = true;
-
+    if (FormatTok->isOneOf(tok::string_literal, tok::comment)) {
+        FormatTok->FirstNewlineInText = FormatTok->TokenText.find('\n');
+        FormatTok->LastNewlineInText = FormatTok->TokenText.find_last_of('\n');
+    }
     // FIXME: Add the CodePointCount to Column.
     FormatTok->WhitespaceRange = SourceRange(
         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
Index: lib/Format/FormatToken.h
===================================================================
--- lib/Format/FormatToken.h
+++ lib/Format/FormatToken.h
@@ -80,7 +80,8 @@
 /// whitespace characters preceeding it.
 struct FormatToken {
   FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false), IsMultiline(false),
+      : NewlinesBefore(0), HasUnescapedNewline(false),
+        FirstNewlineInText(StringRef::npos), LastNewlineInText(StringRef::npos),
         LastNewlineOffset(0), CodePointCount(0), IsFirst(false),
         MustBreakBefore(false), IsUnterminatedLiteral(false),
         BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0),
@@ -104,8 +105,18 @@
   /// Token.
   bool HasUnescapedNewline;
 
+  /// \brief Contains a position of the first newline character (escaped or not)
+  /// in the token text or StringRef::npos when the text doesn't contain
+  /// newlines. Is filled for string literals and comments.
+  size_t FirstNewlineInText;
+
+  /// \brief Contains a position of the last newline character (escaped or not)
+  /// in the token text or StringRef::npos when the text doesn't contain
+  /// newlines. Is filled for string literals and comments.
+  size_t LastNewlineInText;
+
   /// \brief Whether the token text contains newlines (escaped or not).
-  bool IsMultiline;
+  bool isMultiline() const { return FirstNewlineInText != StringRef::npos; }
 
   /// \brief The range of the whitespace immediately preceeding the \c Token.
   SourceRange WhitespaceRange;
Index: lib/Format/TokenAnnotator.cpp
===================================================================
--- lib/Format/TokenAnnotator.cpp
+++ lib/Format/TokenAnnotator.cpp
@@ -1024,7 +1024,7 @@
     Current->CanBreakBefore =
         Current->MustBreakBefore || canBreakBefore(Line, *Current);
     if (Current->MustBreakBefore ||
-        (Current->is(tok::string_literal) && Current->IsMultiline))
+        (Current->is(tok::string_literal) && Current->isMultiline()))
       Current->TotalLength = Current->Previous->TotalLength + Style.ColumnLimit;
     else
       Current->TotalLength = Current->Previous->TotalLength +
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1556.1.patch
Type: text/x-patch
Size: 5809 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130830/2baee1c7/attachment.bin>