r183978 - Don't remove backslashes from block comments.

Alexander Kornienko alexfh at google.com
Fri Jun 14 04:46:11 PDT 2013


Author: alexfh
Date: Fri Jun 14 06:46:10 2013
New Revision: 183978

URL: http://llvm.org/viewvc/llvm-project?rev=183978&view=rev
Log:
Don't remove backslashes from block comments.

Summary:
Don't remove backslashes from block comments. Previously this
/* \    \ \ \ \ \
*/
would be turned to this:
/*
*/
which spoils some kinds of ASCII-art, people use in their comments. The behavior
was related to handling escaped newlines in block comments inside preprocessor
directives. This patch makes handling it in a more civilized way.

Reviewers: klimek

Reviewed By: klimek

CC: cfe-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D979

Modified:
    cfe/trunk/lib/Format/BreakableToken.cpp
    cfe/trunk/lib/Format/BreakableToken.h
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=183978&r1=183977&r2=183978&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Fri Jun 14 06:46:10 2013
@@ -119,13 +119,11 @@ unsigned BreakableSingleLineToken::getLi
          encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);
 }
 
-BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
-                                                   unsigned StartColumn,
-                                                   StringRef Prefix,
-                                                   StringRef Postfix,
-                                                   encoding::Encoding Encoding)
-    : BreakableToken(Tok, Encoding), StartColumn(StartColumn), Prefix(Prefix),
-      Postfix(Postfix) {
+BreakableSingleLineToken::BreakableSingleLineToken(
+    const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
+    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding)
+    : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn),
+      Prefix(Prefix), Postfix(Postfix) {
   assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
   Line = Tok.TokenText.substr(
       Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
@@ -133,8 +131,10 @@ BreakableSingleLineToken::BreakableSingl
 
 BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
                                                unsigned StartColumn,
+                                               bool InPPDirective,
                                                encoding::Encoding Encoding)
-    : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", Encoding) {}
+    : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective,
+                               Encoding) {}
 
 BreakableToken::Split
 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
@@ -145,7 +145,6 @@ BreakableStringLiteral::getSplit(unsigne
 
 void BreakableStringLiteral::insertBreak(unsigned LineIndex,
                                          unsigned TailOffset, Split Split,
-                                         bool InPPDirective,
                                          WhitespaceManager &Whitespaces) {
   Whitespaces.replaceWhitespaceInToken(
       Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
@@ -162,10 +161,11 @@ static StringRef getLineCommentPrefix(St
 
 BreakableLineComment::BreakableLineComment(const FormatToken &Token,
                                            unsigned StartColumn,
+                                           bool InPPDirective,
                                            encoding::Encoding Encoding)
     : BreakableSingleLineToken(Token, StartColumn,
                                getLineCommentPrefix(Token.TokenText), "",
-                               Encoding) {
+                               InPPDirective, Encoding) {
   OriginalPrefix = Prefix;
   if (Token.TokenText.size() > Prefix.size() &&
       isAlphanumeric(Token.TokenText[Prefix.size()])) {
@@ -184,7 +184,7 @@ BreakableLineComment::getSplit(unsigned
 }
 
 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
-                                       Split Split, bool InPPDirective,
+                                       Split Split,
                                        WhitespaceManager &Whitespaces) {
   Whitespaces.replaceWhitespaceInToken(
       Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
@@ -193,7 +193,6 @@ void BreakableLineComment::insertBreak(u
 
 void
 BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex,
-                                              unsigned InPPDirective,
                                               WhitespaceManager &Whitespaces) {
   if (OriginalPrefix != Prefix) {
     Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
@@ -203,8 +202,9 @@ BreakableLineComment::replaceWhitespaceB
 
 BreakableBlockComment::BreakableBlockComment(
     const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn,
-    unsigned OriginalStartColumn, bool FirstInLine, encoding::Encoding Encoding)
-    : BreakableToken(Token, Encoding) {
+    unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
+    encoding::Encoding Encoding)
+    : BreakableToken(Token, InPPDirective, Encoding) {
   StringRef TokenText(Token.TokenText);
   assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
   TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
@@ -264,8 +264,18 @@ BreakableBlockComment::BreakableBlockCom
 void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
                                              unsigned LineIndex,
                                              int IndentDelta) {
+  // When in a preprocessor directive, the trailing backslash in a block comment
+  // is not needed, but can serve a purpose of uniformity with necessary escaped
+  // newlines outside the comment. In this case we remove it here before
+  // trimming the trailing whitespace. The backslash will be re-added later when
+  // inserting a line break.
+  size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
+  if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
+    --EndOfPreviousLine;
+
   // Calculate the end of the non-whitespace text in the previous line.
-  size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
+  EndOfPreviousLine =
+      Lines[LineIndex - 1].find_last_not_of(" \t", EndOfPreviousLine);
   if (EndOfPreviousLine == StringRef::npos)
     EndOfPreviousLine = 0;
   else
@@ -313,7 +323,7 @@ BreakableBlockComment::getSplit(unsigned
 }
 
 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
-                                        Split Split, bool InPPDirective,
+                                        Split Split,
                                         WhitespaceManager &Whitespaces) {
   StringRef Text = Lines[LineIndex].substr(TailOffset);
   StringRef Prefix = Decoration;
@@ -334,7 +344,6 @@ void BreakableBlockComment::insertBreak(
 
 void
 BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
-                                               unsigned InPPDirective,
                                                WhitespaceManager &Whitespaces) {
   if (LineIndex == 0)
     return;

Modified: cfe/trunk/lib/Format/BreakableToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.h?rev=183978&r1=183977&r2=183978&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.h (original)
+++ cfe/trunk/lib/Format/BreakableToken.h Fri Jun 14 06:46:10 2013
@@ -59,20 +59,20 @@ public:
 
   /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective,
                            WhitespaceManager &Whitespaces) = 0;
 
   /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
   virtual void replaceWhitespaceBefore(unsigned LineIndex,
-                                       unsigned InPPDirective,
                                        WhitespaceManager &Whitespaces) {}
 
 protected:
-  BreakableToken(const FormatToken &Tok, encoding::Encoding Encoding)
-      : Tok(Tok), Encoding(Encoding) {}
+  BreakableToken(const FormatToken &Tok, bool InPPDirective,
+                 encoding::Encoding Encoding)
+      : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding) {}
 
   const FormatToken &Tok;
-  encoding::Encoding Encoding;
+  const bool InPPDirective;
+  const encoding::Encoding Encoding;
 };
 
 /// \brief Base class for single line tokens that can be broken.
@@ -88,7 +88,7 @@ public:
 protected:
   BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
                            StringRef Prefix, StringRef Postfix,
-                           encoding::Encoding Encoding);
+                           bool InPPDirective, encoding::Encoding Encoding);
 
   // The column in which the token starts.
   unsigned StartColumn;
@@ -107,12 +107,11 @@ public:
   /// \p StartColumn specifies the column in which the token will start
   /// after formatting.
   BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
-                         encoding::Encoding Encoding);
+                         bool InPPDirective, encoding::Encoding Encoding);
 
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const;
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective,
                            WhitespaceManager &Whitespaces);
 };
 
@@ -123,14 +122,13 @@ public:
   /// \p StartColumn specifies the column in which the comment will start
   /// after formatting.
   BreakableLineComment(const FormatToken &Token, unsigned StartColumn,
-                       encoding::Encoding Encoding);
+                       bool InPPDirective, encoding::Encoding Encoding);
 
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const;
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces);
+                           WhitespaceManager &Whitespaces);
   virtual void replaceWhitespaceBefore(unsigned LineIndex,
-                                       unsigned InPPDirective,
                                        WhitespaceManager &Whitespaces);
 
 private:
@@ -148,7 +146,8 @@ public:
   /// If the comment starts a line after formatting, set \p FirstInLine to true.
   BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
                         unsigned StartColumn, unsigned OriginaStartColumn,
-                        bool FirstInLine, encoding::Encoding Encoding);
+                        bool FirstInLine, bool InPPDirective,
+                        encoding::Encoding Encoding);
 
   virtual unsigned getLineCount() const;
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
@@ -157,9 +156,8 @@ public:
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const;
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces);
+                           WhitespaceManager &Whitespaces);
   virtual void replaceWhitespaceBefore(unsigned LineIndex,
-                                       unsigned InPPDirective,
                                        WhitespaceManager &Whitespaces);
 
 private:

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=183978&r1=183977&r2=183978&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Fri Jun 14 06:46:10 2013
@@ -804,7 +804,6 @@ private:
   /// cover the cost of the additional line breaks.
   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
                                 bool DryRun) {
-    unsigned UnbreakableTailLength = Current.UnbreakableTailLength;
     llvm::OwningPtr<BreakableToken> Token;
     unsigned StartColumn = State.Column - Current.CodePointCount;
     unsigned OriginalStartColumn =
@@ -814,37 +813,34 @@ private:
     if (Current.is(tok::string_literal) &&
         Current.Type != TT_ImplicitStringLiteral) {
       // Only break up default narrow strings.
-      const char *LiteralData =
-          SourceMgr.getCharacterData(Current.getStartOfNonWhitespace());
-      if (!LiteralData || *LiteralData != '"')
+      if (!Current.TokenText.startswith("\""))
         return 0;
 
-      Token.reset(new BreakableStringLiteral(Current, StartColumn, Encoding));
+      Token.reset(new BreakableStringLiteral(Current, StartColumn,
+                                             Line.InPPDirective, Encoding));
     } else if (Current.Type == TT_BlockComment) {
-      BreakableBlockComment *BBC = new BreakableBlockComment(
+      Token.reset(new BreakableBlockComment(
           Style, Current, StartColumn, OriginalStartColumn, !Current.Previous,
-          Encoding);
-      Token.reset(BBC);
+          Line.InPPDirective, Encoding));
     } else if (Current.Type == TT_LineComment &&
                (Current.Previous == NULL ||
                 Current.Previous->Type != TT_ImplicitStringLiteral)) {
-      Token.reset(new BreakableLineComment(Current, StartColumn, Encoding));
+      Token.reset(new BreakableLineComment(Current, StartColumn,
+                                           Line.InPPDirective, Encoding));
     } else {
       return 0;
     }
-    if (UnbreakableTailLength >= getColumnLimit())
+    if (Current.UnbreakableTailLength >= getColumnLimit())
       return 0;
-    unsigned RemainingSpace = getColumnLimit() - UnbreakableTailLength;
+    unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength;
 
     bool BreakInserted = false;
     unsigned Penalty = 0;
     unsigned PositionAfterLastLineInToken = 0;
     for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
          LineIndex != EndIndex; ++LineIndex) {
-      if (!DryRun) {
-        Token->replaceWhitespaceBefore(LineIndex, Line.InPPDirective,
-                                       Whitespaces);
-      }
+      if (!DryRun)
+        Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
       unsigned TailOffset = 0;
       unsigned RemainingTokenColumns = Token->getLineLengthAfterSplit(
           LineIndex, TailOffset, StringRef::npos);
@@ -858,10 +854,8 @@ private:
             LineIndex, TailOffset + Split.first + Split.second,
             StringRef::npos);
         assert(NewRemainingTokenColumns < RemainingTokenColumns);
-        if (!DryRun) {
-          Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
-                             Whitespaces);
-        }
+        if (!DryRun)
+          Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
         Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString
                                                    : Style.PenaltyBreakComment;
         unsigned ColumnsUsed =

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=183978&r1=183977&r2=183978&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Jun 14 06:46:10 2013
@@ -1961,6 +1961,10 @@ TEST_F(FormatTest, EscapedNewlineAtStart
   EXPECT_EQ("template <class T> f();", format("\\\ntemplate <class T> f();"));
 }
 
+TEST_F(FormatTest, NoEscapedNewlineHandlingInBlockComments) {
+  EXPECT_EQ("/* \\  \\  \\\n*/", format("\\\n/* \\  \\  \\\n*/"));
+}
+
 TEST_F(FormatTest, CalculateSpaceOnConsecutiveLinesInMacro) {
   verifyFormat("#define A \\\n"
                "  int v(  \\\n"
@@ -4605,6 +4609,19 @@ TEST_F(FormatTest, BreakStringLiterals)
       format("#define A \"some text other\";", AlignLeft));
 }
 
+TEST_F(FormatTest, SkipsUnknownStringLiterals) {
+  EXPECT_EQ("u8\"unsupported literal\";",
+            format("u8\"unsupported literal\";", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("u\"unsupported literal\";",
+            format("u\"unsupported literal\";", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("U\"unsupported literal\";",
+            format("U\"unsupported literal\";", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("L\"unsupported literal\";",
+            format("L\"unsupported literal\";", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("R\"x(raw literal)x\";",
+            format("R\"x(raw literal)x\";", getLLVMStyleWithColumns(15)));
+}
+
 TEST_F(FormatTest, BreakStringLiteralsBeforeUnbreakableTokenSequence) {
   EXPECT_EQ("someFunction(\"aaabbbcccd\"\n"
             "             \"ddeeefff\");",





More information about the cfe-commits mailing list