<div dir="ltr"><br><div class="gmail_extra"><div class="gmail_quote">On Thu, Aug 8, 2013 at 4:10 AM, Daniel Jasper <span dir="ltr"><<a href="mailto:djasper@google.com" target="_blank">djasper@google.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">With this patch, clang-format tests assert on Mac. The underlying issue is that tab characters are not-printable. Thus, the corresponding function on mac (somewhat correctly) return -1 and columnWidth asserts. I think we should do two things:<div>

1) Move the generic columnWidth location into an accessible location. Then directly use that from clang-format. We do not want clang-format to output different layouts based on the platform.</div></div></blockquote><div>
<br></div><div>I'm going to test our custom implementation on MacOSX and get rid of per-platform definitions for columnWidth and isPrint.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div dir="ltr"><div>2) We probably should not count tab characters as 0-width (as I think we know do). Can we instead use the configured IndentWidth?</div></div></blockquote><div><br></div><div>We can't handle  tab characters correctly on this level, as their width is context-dependent. So columnWidth will continue returning -1 for them (as I hope we now do), and I'll take a look at what we can do to handle them correctly.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">
<div><br></div><div>Cheers,</div><div>Daniel</div></div><div class="HOEnZb"><div class="h5"><div class="gmail_extra"><br><br><div class="gmail_quote">On Wed, Aug 7, 2013 at 4:29 PM, Alexander Kornienko <span dir="ltr"><<a href="mailto:alexfh@google.com" target="_blank">alexfh@google.com</a>></span> wrote:<br>

<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: alexfh<br>
Date: Wed Aug  7 18:29:01 2013<br>
New Revision: 187935<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=187935&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=187935&view=rev</a><br>
Log:<br>
Support for double width characters.<br>
<br>
Summary: Only works for UTF-8-encoded files.<br>
<br>
Reviewers: djasper<br>
<br>
Reviewed By: djasper<br>
<br>
CC: cfe-commits, klimek<br>
<br>
Differential Revision: <a href="http://llvm-reviews.chandlerc.com/D1311" target="_blank">http://llvm-reviews.chandlerc.com/D1311</a><br>
<br>
Modified:<br>
    cfe/trunk/lib/Format/BreakableToken.cpp<br>
    cfe/trunk/unittests/Format/FormatTest.cpp<br>
<br>
Modified: cfe/trunk/lib/Format/BreakableToken.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=187935&r1=187934&r2=187935&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=187935&r1=187934&r2=187935&view=diff</a><br>


==============================================================================<br>
--- cfe/trunk/lib/Format/BreakableToken.cpp (original)<br>
+++ cfe/trunk/lib/Format/BreakableToken.cpp Wed Aug  7 18:29:01 2013<br>
@@ -20,6 +20,7 @@<br>
 #include "clang/Format/Format.h"<br>
 #include "llvm/ADT/STLExtras.h"<br>
 #include "llvm/Support/Debug.h"<br>
+#include "llvm/Support/Locale.h"<br>
 #include <algorithm><br>
<br>
 namespace clang {<br>
@@ -38,6 +39,15 @@ static bool IsBlank(char C) {<br>
   }<br>
 }<br>
<br>
+static unsigned columnWidth(StringRef Text, encoding::Encoding Encoding) {<br>
+  if (Encoding == encoding::Encoding_UTF8) {<br>
+    int ContentWidth = llvm::sys::locale::columnWidth(Text);<br>
+    if (ContentWidth >= 0)<br>
+      return ContentWidth;<br>
+  }<br>
+  return encoding::getCodePointCount(Text, Encoding);<br>
+}<br>
+<br>
 static BreakableToken::Split getCommentSplit(StringRef Text,<br>
                                              unsigned ContentStartColumn,<br>
                                              unsigned ColumnLimit,<br>
@@ -49,9 +59,12 @@ static BreakableToken::Split getCommentS<br>
   unsigned MaxSplitBytes = 0;<br>
<br>
   for (unsigned NumChars = 0;<br>
-       NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)<br>
-    MaxSplitBytes +=<br>
+       NumChars < MaxSplit && MaxSplitBytes < Text.size();) {<br>
+    unsigned NumBytes =<br>
         encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);<br>
+    NumChars += columnWidth(Text.substr(MaxSplitBytes, NumBytes), Encoding);<br>
+    MaxSplitBytes += NumBytes;<br>
+  }<br>
<br>
   StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);<br>
   if (SpaceOffset == StringRef::npos ||<br>
@@ -84,9 +97,8 @@ static BreakableToken::Split getStringSp<br>
     return BreakableToken::Split(StringRef::npos, 0);<br>
   if (ColumnLimit <= ContentStartColumn)<br>
     return BreakableToken::Split(StringRef::npos, 0);<br>
-  unsigned MaxSplit =<br>
-      std::min<unsigned>(ColumnLimit - ContentStartColumn,<br>
-                         encoding::getCodePointCount(Text, Encoding) - 1);<br>
+  unsigned MaxSplit = std::min<unsigned>(ColumnLimit - ContentStartColumn,<br>
+                                         columnWidth(Text, Encoding) - 1);<br>
   StringRef::size_type SpaceOffset = 0;<br>
   StringRef::size_type SlashOffset = 0;<br>
   StringRef::size_type WordStartOffset = 0;<br>
@@ -98,7 +110,7 @@ static BreakableToken::Split getStringSp<br>
       Chars += Advance;<br>
     } else {<br>
       Advance = encoding::getCodePointNumBytes(Text[0], Encoding);<br>
-      Chars += 1;<br>
+      Chars += columnWidth(Text.substr(0, Advance), Encoding);<br>
     }<br>
<br>
     if (Chars > MaxSplit)<br>
@@ -131,7 +143,7 @@ unsigned BreakableSingleLineToken::getLi<br>
 unsigned BreakableSingleLineToken::getLineLengthAfterSplit(<br>
     unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {<br>
   return StartColumn + Prefix.size() + Postfix.size() +<br>
-         encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);<br>
+         columnWidth(Line.substr(Offset, Length), Encoding);<br>
 }<br>
<br>
 BreakableSingleLineToken::BreakableSingleLineToken(<br>
@@ -329,8 +341,7 @@ unsigned BreakableBlockComment::getLineC<br>
 unsigned BreakableBlockComment::getLineLengthAfterSplit(<br>
     unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {<br>
   return getContentStartColumn(LineIndex, Offset) +<br>
-         encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),<br>
-                                     Encoding) +<br>
+         columnWidth(Lines[LineIndex].substr(Offset, Length), Encoding) +<br>
          // The last line gets a "*/" postfix.<br>
          (LineIndex + 1 == Lines.size() ? 2 : 0);<br>
 }<br>
<br>
Modified: cfe/trunk/unittests/Format/FormatTest.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=187935&r1=187934&r2=187935&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=187935&r1=187934&r2=187935&view=diff</a><br>


==============================================================================<br>
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)<br>
+++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Aug  7 18:29:01 2013<br>
@@ -5704,15 +5704,15 @@ TEST_F(FormatTest, CountsUTF8CharactersP<br>
   verifyFormat("\"Однажды Ð² Ñ Ñ‚удёную Ð·Ð¸Ð¼Ð½ÑŽÑŽ Ð¿Ð¾Ñ€Ñƒ...\"",<br>
                getLLVMStyleWithColumns(35));<br>
   verifyFormat("\"一 äºŒ ä¸‰ å›› äº” å…­ ä¸ƒ å…« ä¹  å  \"",<br>
-               getLLVMStyleWithColumns(21));<br>
+               getLLVMStyleWithColumns(31));<br>
   verifyFormat("// ÐžÐ´Ð½Ð°Ð¶Ð´Ñ‹ Ð² Ñ Ñ‚удёную Ð·Ð¸Ð¼Ð½ÑŽÑŽ Ð¿Ð¾Ñ€Ñƒ...",<br>
                getLLVMStyleWithColumns(36));<br>
   verifyFormat("// ä¸€ äºŒ ä¸‰ å›› äº” å…­ ä¸ƒ å…« ä¹  å  ",<br>
-               getLLVMStyleWithColumns(22));<br>
+               getLLVMStyleWithColumns(32));<br>
   verifyFormat("/* ÐžÐ´Ð½Ð°Ð¶Ð´Ñ‹ Ð² Ñ Ñ‚удёную Ð·Ð¸Ð¼Ð½ÑŽÑŽ Ð¿Ð¾Ñ€Ñƒ... */",<br>
                getLLVMStyleWithColumns(39));<br>
   verifyFormat("/* ä¸€ äºŒ ä¸‰ å›› äº” å…­ ä¸ƒ å…« ä¹  å   */",<br>
-               getLLVMStyleWithColumns(25));<br>
+               getLLVMStyleWithColumns(35));<br>
 }<br>
<br>
 TEST_F(FormatTest, SplitsUTF8Strings) {<br>
@@ -5723,11 +5723,12 @@ TEST_F(FormatTest, SplitsUTF8Strings) {<br>
       "\"пору,\"",<br>
       format("\"Однажды, Ð² Ñ Ñ‚удёную Ð·Ð¸Ð¼Ð½ÑŽÑŽ Ð¿Ð¾Ñ€Ñƒ,\"",<br>
              getLLVMStyleWithColumns(13)));<br>
-  EXPECT_EQ("\"一 äºŒ ä¸‰ å›› \"\n"<br>
-            "\"五 å…­ ä¸ƒ å…« \"\n"<br>
-            "\"ä¹  å  \"",<br>
-            format("\"一 äºŒ ä¸‰ å›› äº” å…­ ä¸ƒ å…« ä¹  å  \"",<br>
-                   getLLVMStyleWithColumns(10)));<br>
+  EXPECT_EQ("\"一 äºŒ ä¸‰ \"\n"<br>
+            "\"å›› äº”å…­ \"\n"<br>
+            "\"七 å…« ä¹  \"\n"<br>
+            "\"å  \"",<br>
+            format("\"一 äºŒ ä¸‰ å›› äº”å…­ ä¸ƒ å…« ä¹  å  \"",<br>
+                   getLLVMStyleWithColumns(11)));<br>
 }<br>
<br>
 TEST_F(FormatTest, SplitsUTF8LineComments) {<br>
@@ -5739,9 +5740,9 @@ TEST_F(FormatTest, SplitsUTF8LineComment<br>
                    getLLVMStyleWithColumns(13)));<br>
   EXPECT_EQ("// ä¸€äºŒä¸‰\n"<br>
             "// å››äº”六七\n"<br>
-            "// å…«\n"<br>
-            "// ä¹  å  ",<br>
-            format("// ä¸€äºŒä¸‰ å››äº”六七 å…«  ä¹  å  ", getLLVMStyleWithColumns(6)));<br>
+            "// å…«  ä¹ \n"<br>
+            "// å  ",<br>
+            format("// ä¸€äºŒä¸‰ å››äº”六七 å…«  ä¹  å  ", getLLVMStyleWithColumns(9)));<br>
 }<br>
<br>
 TEST_F(FormatTest, SplitsUTF8BlockComments) {<br>
@@ -5758,16 +5759,17 @@ TEST_F(FormatTest, SplitsUTF8BlockCommen<br>
                    getLLVMStyleWithColumns(13)));<br>
   EXPECT_EQ("/* ä¸€äºŒä¸‰\n"<br>
             " * å››äº”六七\n"<br>
-            " * å…«\n"<br>
-            " * ä¹  å  \n"<br>
-            " */",<br>
-            format("/* ä¸€äºŒä¸‰ å››äº”六七 å…«  ä¹  å   */", getLLVMStyleWithColumns(6)));<br>
+            " * å…«  ä¹ \n"<br>
+            " * å    */",<br>
+            format("/* ä¸€äºŒä¸‰ å››äº”六七 å…«  ä¹  å    */", getLLVMStyleWithColumns(9)));<br>
   EXPECT_EQ("/* ð “£ð “®ð “¼ð “½ ð ”£ð ”¬ð ”²ð ”¯\n"<br>
             " * ð •“𠕪𠕥𠕖\n"<br>
             " * ð –€ð •¿ð •±-ð Ÿ  */",<br>
             format("/* ð “£ð “®ð “¼ð “½ ð ”£ð ”¬ð ”²ð ”¯ ð •“𠕪𠕥𠕖 ð –€ð •¿ð •±-ð Ÿ  */", getLLVMStyleWithColumns(12)));<br>
 }<br>
<br>
+#endif // _MSC_VER<br>
+<br>
 TEST_F(FormatTest, FormatsWithWebKitStyle) {<br>
   FormatStyle Style = getWebKitStyle();<br>
<br>
@@ -5847,7 +5849,5 @@ TEST_F(FormatTest, FormatsWithWebKitStyl<br>
             format("if (aaaaaaaaaaaaaaa || bbbbbbbbbbbbbbb) { i++; }", Style));<br>
 }<br>
<br>
-#endif<br>
-<br>
 } // end namespace tooling<br>
 } // end namespace clang<br>
<br>
<br>
_______________________________________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@cs.uiuc.edu" target="_blank">cfe-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits</a><br>
</blockquote></div><br></div>
</div></div></blockquote></div><br>
</div></div>