<div dir="ltr"><br><div class="gmail_extra"><div class="gmail_quote">On Thu, Aug 8, 2013 at 4:10 AM, Daniel Jasper <span dir="ltr"><<a href="mailto:djasper@google.com" target="_blank">djasper@google.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">With this patch, clang-format tests assert on Mac. The underlying issue is that tab characters are not-printable. Thus, the corresponding function on mac (somewhat correctly) return -1 and columnWidth asserts. I think we should do two things:<div>
1) Move the generic columnWidth location into an accessible location. Then directly use that from clang-format. We do not want clang-format to output different layouts based on the platform.</div></div></blockquote><div>
<br></div><div>I'm going to test our custom implementation on MacOSX and get rid of per-platform definitions for columnWidth and isPrint.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div dir="ltr"><div>2) We probably should not count tab characters as 0-width (as I think we know do). Can we instead use the configured IndentWidth?</div></div></blockquote><div><br></div><div>We can't handle tab characters correctly on this level, as their width is context-dependent. So columnWidth will continue returning -1 for them (as I hope we now do), and I'll take a look at what we can do to handle them correctly.</div>
<div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">
<div><br></div><div>Cheers,</div><div>Daniel</div></div><div class="HOEnZb"><div class="h5"><div class="gmail_extra"><br><br><div class="gmail_quote">On Wed, Aug 7, 2013 at 4:29 PM, Alexander Kornienko <span dir="ltr"><<a href="mailto:alexfh@google.com" target="_blank">alexfh@google.com</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: alexfh<br>
Date: Wed Aug 7 18:29:01 2013<br>
New Revision: 187935<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=187935&view=rev" target="_blank">http://llvm.org/viewvc/llvm-project?rev=187935&view=rev</a><br>
Log:<br>
Support for double width characters.<br>
<br>
Summary: Only works for UTF-8-encoded files.<br>
<br>
Reviewers: djasper<br>
<br>
Reviewed By: djasper<br>
<br>
CC: cfe-commits, klimek<br>
<br>
Differential Revision: <a href="http://llvm-reviews.chandlerc.com/D1311" target="_blank">http://llvm-reviews.chandlerc.com/D1311</a><br>
<br>
Modified:<br>
cfe/trunk/lib/Format/BreakableToken.cpp<br>
cfe/trunk/unittests/Format/FormatTest.cpp<br>
<br>
Modified: cfe/trunk/lib/Format/BreakableToken.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=187935&r1=187934&r2=187935&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=187935&r1=187934&r2=187935&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/lib/Format/BreakableToken.cpp (original)<br>
+++ cfe/trunk/lib/Format/BreakableToken.cpp Wed Aug 7 18:29:01 2013<br>
@@ -20,6 +20,7 @@<br>
#include "clang/Format/Format.h"<br>
#include "llvm/ADT/STLExtras.h"<br>
#include "llvm/Support/Debug.h"<br>
+#include "llvm/Support/Locale.h"<br>
#include <algorithm><br>
<br>
namespace clang {<br>
@@ -38,6 +39,15 @@ static bool IsBlank(char C) {<br>
}<br>
}<br>
<br>
+static unsigned columnWidth(StringRef Text, encoding::Encoding Encoding) {<br>
+ if (Encoding == encoding::Encoding_UTF8) {<br>
+ int ContentWidth = llvm::sys::locale::columnWidth(Text);<br>
+ if (ContentWidth >= 0)<br>
+ return ContentWidth;<br>
+ }<br>
+ return encoding::getCodePointCount(Text, Encoding);<br>
+}<br>
+<br>
static BreakableToken::Split getCommentSplit(StringRef Text,<br>
unsigned ContentStartColumn,<br>
unsigned ColumnLimit,<br>
@@ -49,9 +59,12 @@ static BreakableToken::Split getCommentS<br>
unsigned MaxSplitBytes = 0;<br>
<br>
for (unsigned NumChars = 0;<br>
- NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)<br>
- MaxSplitBytes +=<br>
+ NumChars < MaxSplit && MaxSplitBytes < Text.size();) {<br>
+ unsigned NumBytes =<br>
encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);<br>
+ NumChars += columnWidth(Text.substr(MaxSplitBytes, NumBytes), Encoding);<br>
+ MaxSplitBytes += NumBytes;<br>
+ }<br>
<br>
StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);<br>
if (SpaceOffset == StringRef::npos ||<br>
@@ -84,9 +97,8 @@ static BreakableToken::Split getStringSp<br>
return BreakableToken::Split(StringRef::npos, 0);<br>
if (ColumnLimit <= ContentStartColumn)<br>
return BreakableToken::Split(StringRef::npos, 0);<br>
- unsigned MaxSplit =<br>
- std::min<unsigned>(ColumnLimit - ContentStartColumn,<br>
- encoding::getCodePointCount(Text, Encoding) - 1);<br>
+ unsigned MaxSplit = std::min<unsigned>(ColumnLimit - ContentStartColumn,<br>
+ columnWidth(Text, Encoding) - 1);<br>
StringRef::size_type SpaceOffset = 0;<br>
StringRef::size_type SlashOffset = 0;<br>
StringRef::size_type WordStartOffset = 0;<br>
@@ -98,7 +110,7 @@ static BreakableToken::Split getStringSp<br>
Chars += Advance;<br>
} else {<br>
Advance = encoding::getCodePointNumBytes(Text[0], Encoding);<br>
- Chars += 1;<br>
+ Chars += columnWidth(Text.substr(0, Advance), Encoding);<br>
}<br>
<br>
if (Chars > MaxSplit)<br>
@@ -131,7 +143,7 @@ unsigned BreakableSingleLineToken::getLi<br>
unsigned BreakableSingleLineToken::getLineLengthAfterSplit(<br>
unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {<br>
return StartColumn + Prefix.size() + Postfix.size() +<br>
- encoding::getCodePointCount(Line.substr(Offset, Length), Encoding);<br>
+ columnWidth(Line.substr(Offset, Length), Encoding);<br>
}<br>
<br>
BreakableSingleLineToken::BreakableSingleLineToken(<br>
@@ -329,8 +341,7 @@ unsigned BreakableBlockComment::getLineC<br>
unsigned BreakableBlockComment::getLineLengthAfterSplit(<br>
unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {<br>
return getContentStartColumn(LineIndex, Offset) +<br>
- encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length),<br>
- Encoding) +<br>
+ columnWidth(Lines[LineIndex].substr(Offset, Length), Encoding) +<br>
// The last line gets a "*/" postfix.<br>
(LineIndex + 1 == Lines.size() ? 2 : 0);<br>
}<br>
<br>
Modified: cfe/trunk/unittests/Format/FormatTest.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=187935&r1=187934&r2=187935&view=diff" target="_blank">http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=187935&r1=187934&r2=187935&view=diff</a><br>
==============================================================================<br>
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)<br>
+++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Aug 7 18:29:01 2013<br>
@@ -5704,15 +5704,15 @@ TEST_F(FormatTest, CountsUTF8CharactersP<br>
verifyFormat("\"Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю пору...\"",<br>
getLLVMStyleWithColumns(35));<br>
verifyFormat("\"一 二 三 å›› 五 å… ä¸ƒ å…« ä¹ å \"",<br>
- getLLVMStyleWithColumns(21));<br>
+ getLLVMStyleWithColumns(31));<br>
verifyFormat("// Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю пору...",<br>
getLLVMStyleWithColumns(36));<br>
verifyFormat("// 一 二 三 å›› 五 å… ä¸ƒ å…« ä¹ å ",<br>
- getLLVMStyleWithColumns(22));<br>
+ getLLVMStyleWithColumns(32));<br>
verifyFormat("/* Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю пору... */",<br>
getLLVMStyleWithColumns(39));<br>
verifyFormat("/* 一 二 三 å›› 五 å… ä¸ƒ å…« ä¹ å */",<br>
- getLLVMStyleWithColumns(25));<br>
+ getLLVMStyleWithColumns(35));<br>
}<br>
<br>
TEST_F(FormatTest, SplitsUTF8Strings) {<br>
@@ -5723,11 +5723,12 @@ TEST_F(FormatTest, SplitsUTF8Strings) {<br>
"\"пору,\"",<br>
format("\"Однажды, в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю пору,\"",<br>
getLLVMStyleWithColumns(13)));<br>
- EXPECT_EQ("\"一 二 三 四 \"\n"<br>
- "\"五 å… ä¸ƒ å…« \"\n"<br>
- "\"ä¹ å \"",<br>
- format("\"一 二 三 å›› 五 å… ä¸ƒ å…« ä¹ å \"",<br>
- getLLVMStyleWithColumns(10)));<br>
+ EXPECT_EQ("\"一 二 三 \"\n"<br>
+ "\"å›› äº”å… \"\n"<br>
+ "\"七 å…« ä¹ \"\n"<br>
+ "\"å \"",<br>
+ format("\"一 二 三 å›› äº”å… ä¸ƒ å…« ä¹ å \"",<br>
+ getLLVMStyleWithColumns(11)));<br>
}<br>
<br>
TEST_F(FormatTest, SplitsUTF8LineComments) {<br>
@@ -5739,9 +5740,9 @@ TEST_F(FormatTest, SplitsUTF8LineComment<br>
getLLVMStyleWithColumns(13)));<br>
EXPECT_EQ("// 一二三\n"<br>
"// 四五å…七\n"<br>
- "// å…«\n"<br>
- "// ä¹ å ",<br>
- format("// 一二三 四五å…七 å…« ä¹ å ", getLLVMStyleWithColumns(6)));<br>
+ "// å…« ä¹ \n"<br>
+ "// å ",<br>
+ format("// 一二三 四五å…七 å…« ä¹ å ", getLLVMStyleWithColumns(9)));<br>
}<br>
<br>
TEST_F(FormatTest, SplitsUTF8BlockComments) {<br>
@@ -5758,16 +5759,17 @@ TEST_F(FormatTest, SplitsUTF8BlockCommen<br>
getLLVMStyleWithColumns(13)));<br>
EXPECT_EQ("/* 一二三\n"<br>
" * 四五å…七\n"<br>
- " * å…«\n"<br>
- " * ä¹ å \n"<br>
- " */",<br>
- format("/* 一二三 四五å…七 å…« ä¹ å */", getLLVMStyleWithColumns(6)));<br>
+ " * å…« ä¹ \n"<br>
+ " * å */",<br>
+ format("/* 一二三 四五å…七 å…« ä¹ å */", getLLVMStyleWithColumns(9)));<br>
EXPECT_EQ("/* 𠓣𠓮𠓼𠓽 𠔣𠔬𠔲𠔯\n"<br>
" * 𠕓𠕪𠕥𠕖\n"<br>
" * 𠖀𠕿𠕱-ð Ÿ */",<br>
format("/* 𠓣𠓮𠓼𠓽 𠔣𠔬𠔲𠔯 𠕓𠕪𠕥𠕖 𠖀𠕿𠕱-ð Ÿ */", getLLVMStyleWithColumns(12)));<br>
}<br>
<br>
+#endif // _MSC_VER<br>
+<br>
TEST_F(FormatTest, FormatsWithWebKitStyle) {<br>
FormatStyle Style = getWebKitStyle();<br>
<br>
@@ -5847,7 +5849,5 @@ TEST_F(FormatTest, FormatsWithWebKitStyl<br>
format("if (aaaaaaaaaaaaaaa || bbbbbbbbbbbbbbb) { i++; }", Style));<br>
}<br>
<br>
-#endif<br>
-<br>
} // end namespace tooling<br>
} // end namespace clang<br>
<br>
<br>
_______________________________________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@cs.uiuc.edu" target="_blank">cfe-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits</a><br>
</blockquote></div><br></div>
</div></div></blockquote></div><br>
</div></div>