r176710 - Fixes breaking of string literals.

Manuel Klimek klimek at google.com
Fri Mar 8 10:59:48 PST 2013


Author: klimek
Date: Fri Mar  8 12:59:48 2013
New Revision: 176710

URL: http://llvm.org/viewvc/llvm-project?rev=176710&view=rev
Log:
Fixes breaking of string literals.

1. We now ignore all non-default string literals, including raw
literals.
2. We do not break inside escape sequences any more.

FIXME: We still break in trigraphs.

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=176710&r1=176709&r2=176710&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Fri Mar  8 12:59:48 2013
@@ -729,6 +729,9 @@ private:
                                 bool DryRun) {
     if (Current.isNot(tok::string_literal))
       return 0;
+    // Only break up default narrow strings.
+    if (StringRef(Current.FormatTok.Tok.getLiteralData()).find('"') != 0)
+      return 0;
 
     unsigned Penalty = 0;
     unsigned TailOffset = 0;
@@ -774,12 +777,63 @@ private:
     StringRef::size_type SlashOffset = Text.rfind('/', Offset);
     if (SlashOffset != StringRef::npos && SlashOffset != 0)
       return SlashOffset;
-    if (Offset > 1)
+    StringRef::size_type Split = getStartOfCharacter(Text, Offset);
+    if (Split != StringRef::npos && Split > 1)
       // Do not split at 0.
-      return Offset - 1;
+      return Split - 1;
     return StringRef::npos;
   }
 
+  StringRef::size_type
+  getStartOfCharacter(StringRef Text, StringRef::size_type Offset) {
+    StringRef::size_type NextEscape = Text.find('\\');
+    while (NextEscape != StringRef::npos && NextEscape < Offset) {
+      StringRef::size_type SequenceLength =
+          getEscapeSequenceLength(Text.substr(NextEscape));
+      if (Offset < NextEscape + SequenceLength)
+        return NextEscape;
+      NextEscape = Text.find('\\', NextEscape + SequenceLength);
+    }
+    return Offset;
+  }
+
+  unsigned getEscapeSequenceLength(StringRef Text) {
+    assert(Text[0] == '\\');
+    if (Text.size() < 2)
+      return 1;
+
+    switch (Text[1]) {
+    case 'u':
+      return 6;
+    case 'U':
+      return 10;
+    case 'x':
+      return getHexLength(Text);
+    default:
+      if (Text[1] >= '0' && Text[1] <= '7')
+        return getOctalLength(Text);
+      return 2;
+    }
+  }
+
+  unsigned getHexLength(StringRef Text) {
+    unsigned I = 2; // Point after '\x'.
+    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+                               (Text[I] >= 'a' && Text[I] <= 'f') ||
+                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
+      ++I;
+    }
+    return I;
+  }
+
+  unsigned getOctalLength(StringRef Text) {
+    unsigned I = 1;
+    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+      ++I;
+    }
+    return I;
+  }
+
   unsigned getColumnLimit() {
     return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
   }

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=176710&r1=176709&r2=176710&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Mar  8 12:59:48 2013
@@ -3108,5 +3108,44 @@ TEST_F(FormatTest, BreakStringLiterals)
       format("\"split/pathat/slashes\"", getLLVMStyleWithColumns(10)));
 }
 
+TEST_F(FormatTest, DoNotBreakStringLiteralsInEscapeSequence) {
+  EXPECT_EQ("\"\\a\"",
+            format("\"\\a\"", getLLVMStyleWithColumns(3)));
+  EXPECT_EQ("\"\\\"",
+            format("\"\\\"", getLLVMStyleWithColumns(2)));
+  EXPECT_EQ("\"test\"\n"
+            "\"\\n\"",
+            format("\"test\\n\"", getLLVMStyleWithColumns(7)));
+  EXPECT_EQ("\"tes\\\\\"\n"
+            "\"n\"",
+            format("\"tes\\\\n\"", getLLVMStyleWithColumns(7)));
+  EXPECT_EQ("\"\\\\\\\\\"\n"
+            "\"\\n\"",
+            format("\"\\\\\\\\\\n\"", getLLVMStyleWithColumns(7)));
+  EXPECT_EQ("\"\\uff01\"",
+            format("\"\\uff01\"", getLLVMStyleWithColumns(7)));
+  EXPECT_EQ("\"\\uff01\"\n"
+            "\"test\"",
+            format("\"\\uff01test\"", getLLVMStyleWithColumns(8)));
+  EXPECT_EQ("\"\\Uff01ff02\"",
+            format("\"\\Uff01ff02\"", getLLVMStyleWithColumns(11)));
+  EXPECT_EQ("\"\\x000000000001\"\n"
+            "\"next\"",
+            format("\"\\x000000000001next\"", getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("\"\\x000000000001next\"",
+            format("\"\\x000000000001next\"", getLLVMStyleWithColumns(15)));
+  EXPECT_EQ("\"\\x000000000001\"",
+            format("\"\\x000000000001\"", getLLVMStyleWithColumns(7)));
+  EXPECT_EQ("\"test\"\n"
+            "\"\\000000\"\n"
+            "\"000001\"",
+            format("\"test\\000000000001\"", getLLVMStyleWithColumns(9)));
+  EXPECT_EQ("\"test\\000\"\n"
+            "\"000000001\"",
+            format("\"test\\000000000001\"", getLLVMStyleWithColumns(10)));
+  EXPECT_EQ("R\"(\\x\\x00)\"\n",
+            format("R\"(\\x\\x00)\"\n", getLLVMStyleWithColumns(7)));
+}
+
 } // end namespace tooling
 } // end namespace clang





More information about the cfe-commits mailing list