r187935 - Support for double width characters.
Alexander Kornienko
alexfh at google.com
Fri Aug 9 08:12:59 PDT 2013
On Thu, Aug 8, 2013 at 4:10 AM, Daniel Jasper <djasper at google.com> wrote:
> With this patch, clang-format tests assert on Mac. The underlying issue is
> that tab characters are not-printable. Thus, the corresponding function on
> mac (somewhat correctly) return -1 and columnWidth asserts. I think we
> should do two things:
> 1) Move the generic columnWidth location into an accessible location. Then
> directly use that from clang-format. We do not want clang-format to output
> different layouts based on the platform.
>
I'm going to test our custom implementation on MacOSX and get rid of
per-platform definitions for columnWidth and isPrint.
> 2) We probably should not count tab characters as 0-width (as I think we
> know do). Can we instead use the configured IndentWidth?
>
We can't handle tab characters correctly on this level, as their width is
context-dependent. So columnWidth will continue returning -1 for them (as I
hope we now do), and I'll take a look at what we can do to handle them
correctly.
>
> Cheers,
> Daniel
>
>
> On Wed, Aug 7, 2013 at 4:29 PM, Alexander Kornienko <alexfh at google.com>wrote:
>
>> Author: alexfh
>> Date: Wed Aug 7 18:29:01 2013
>> New Revision: 187935
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=187935&view=rev
>> Log:
>> Support for double width characters.
>>
>> Summary: Only works for UTF-8-encoded files.
>>
>> Reviewers: djasper
>>
>> Reviewed By: djasper
>>
>> CC: cfe-commits, klimek
>>
>> Differential Revision: http://llvm-reviews.chandlerc.com/D1311
>>
>> Modified:
>> cfe/trunk/lib/Format/BreakableToken.cpp
>> cfe/trunk/unittests/Format/FormatTest.cpp
>>
>> Modified: cfe/trunk/lib/Format/BreakableToken.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=187935&r1=187934&r2=187935&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Format/BreakableToken.cpp (original)
>> +++ cfe/trunk/lib/Format/BreakableToken.cpp Wed Aug 7 18:29:01 2013
>> @@ -20,6 +20,7 @@
>> #include "clang/Format/Format.h"
>> #include "llvm/ADT/STLExtras.h"
>> #include "llvm/Support/Debug.h"
>> +#include "llvm/Support/Locale.h"
>> #include <algorithm>
>>
>> namespace clang {
>> @@ -38,6 +39,15 @@ static bool IsBlank(char C) {
>> }
>> }
>>
>> +static unsigned columnWidth(StringRef Text, encoding::Encoding Encoding)
>> {
>> + if (Encoding == encoding::Encoding_UTF8) {
>> + int ContentWidth = llvm::sys::locale::columnWidth(Text);
>> + if (ContentWidth >= 0)
>> + return ContentWidth;
>> + }
>> + return encoding::getCodePointCount(Text, Encoding);
>> +}
>> +
>> static BreakableToken::Split getCommentSplit(StringRef Text,
>> unsigned ContentStartColumn,
>> unsigned ColumnLimit,
>> @@ -49,9 +59,12 @@ static BreakableToken::Split getCommentS
>> unsigned MaxSplitBytes = 0;
>>
>> for (unsigned NumChars = 0;
>> - NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
>> - MaxSplitBytes +=
>> + NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
>> + unsigned NumBytes =
>> encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
>> + NumChars += columnWidth(Text.substr(MaxSplitBytes, NumBytes),
>> Encoding);
>> + MaxSplitBytes += NumBytes;
>> + }
>>
>> StringRef::size_type SpaceOffset = Text.find_last_of(Blanks,
>> MaxSplitBytes);
>> if (SpaceOffset == StringRef::npos ||
>> @@ -84,9 +97,8 @@ static BreakableToken::Split getStringSp
>> return BreakableToken::Split(StringRef::npos, 0);
>> if (ColumnLimit <= ContentStartColumn)
>> return BreakableToken::Split(StringRef::npos, 0);
>> - unsigned MaxSplit =
>> - std::min<unsigned>(ColumnLimit - ContentStartColumn,
>> - encoding::getCodePointCount(Text, Encoding) -
>> 1);
>> + unsigned MaxSplit = std::min<unsigned>(ColumnLimit -
>> ContentStartColumn,
>> + columnWidth(Text, Encoding) -
>> 1);
>> StringRef::size_type SpaceOffset = 0;
>> StringRef::size_type SlashOffset = 0;
>> StringRef::size_type WordStartOffset = 0;
>> @@ -98,7 +110,7 @@ static BreakableToken::Split getStringSp
>> Chars += Advance;
>> } else {
>> Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
>> - Chars += 1;
>> + Chars += columnWidth(Text.substr(0, Advance), Encoding);
>> }
>>
>> if (Chars > MaxSplit)
>> @@ -131,7 +143,7 @@ unsigned BreakableSingleLineToken::getLi
>> unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
>> unsigned LineIndex, unsigned Offset, StringRef::size_type Length)
>> const {
>> return StartColumn + Prefix.size() + Postfix.size() +
>> - encoding::getCodePointCount(Line.substr(Offset, Length),
>> Encoding);
>> + columnWidth(Line.substr(Offset, Length), Encoding);
>> }
>>
>> BreakableSingleLineToken::BreakableSingleLineToken(
>> @@ -329,8 +341,7 @@ unsigned BreakableBlockComment::getLineC
>> unsigned BreakableBlockComment::getLineLengthAfterSplit(
>> unsigned LineIndex, unsigned Offset, StringRef::size_type Length)
>> const {
>> return getContentStartColumn(LineIndex, Offset) +
>> - encoding::getCodePointCount(Lines[LineIndex].substr(Offset,
>> Length),
>> - Encoding) +
>> + columnWidth(Lines[LineIndex].substr(Offset, Length), Encoding) +
>> // The last line gets a "*/" postfix.
>> (LineIndex + 1 == Lines.size() ? 2 : 0);
>> }
>>
>> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=187935&r1=187934&r2=187935&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
>> +++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Aug 7 18:29:01 2013
>> @@ -5704,15 +5704,15 @@ TEST_F(FormatTest, CountsUTF8CharactersP
>> verifyFormat("\"Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
>> пору...\"",
>> getLLVMStyleWithColumns(35));
>> verifyFormat("\"一 二 三 å›› 五 å… ä¸ƒ å…« ä¹ å \"",
>> - getLLVMStyleWithColumns(21));
>> + getLLVMStyleWithColumns(31));
>> verifyFormat("// Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
>> пору...",
>> getLLVMStyleWithColumns(36));
>> verifyFormat("// 一 二 三 å›› 五 å… ä¸ƒ å…« ä¹ å ",
>> - getLLVMStyleWithColumns(22));
>> + getLLVMStyleWithColumns(32));
>> verifyFormat("/* Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
>> пору... */",
>> getLLVMStyleWithColumns(39));
>> verifyFormat("/* 一 二 三 å›› 五 å… ä¸ƒ å…« ä¹ å */",
>> - getLLVMStyleWithColumns(25));
>> + getLLVMStyleWithColumns(35));
>> }
>>
>> TEST_F(FormatTest, SplitsUTF8Strings) {
>> @@ -5723,11 +5723,12 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
>> "\"пору,\"",
>> format("\"Однажды, в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
>> пору,\"",
>> getLLVMStyleWithColumns(13)));
>> - EXPECT_EQ("\"一 二 三 四 \"\n"
>> - "\"五 å… ä¸ƒ å…« \"\n"
>> - "\"ä¹ å \"",
>> - format("\"一 二 三 å›› 五 å… ä¸ƒ å…« ä¹ å \"",
>> - getLLVMStyleWithColumns(10)));
>> + EXPECT_EQ("\"一 二 三 \"\n"
>> + "\"å›› äº”å… \"\n"
>> + "\"七 å…« ä¹ \"\n"
>> + "\"å \"",
>> + format("\"一 二 三 å›› äº”å… ä¸ƒ å…« ä¹ å \"",
>> + getLLVMStyleWithColumns(11)));
>> }
>>
>> TEST_F(FormatTest, SplitsUTF8LineComments) {
>> @@ -5739,9 +5740,9 @@ TEST_F(FormatTest, SplitsUTF8LineComment
>> getLLVMStyleWithColumns(13)));
>> EXPECT_EQ("// 一二三\n"
>> "// 四五å…七\n"
>> - "// å…«\n"
>> - "// ä¹ å ",
>> - format("// 一二三 四五å…七 å…« ä¹ å ",
>> getLLVMStyleWithColumns(6)));
>> + "// å…« ä¹ \n"
>> + "// å ",
>> + format("// 一二三 四五å…七 å…« ä¹ å ",
>> getLLVMStyleWithColumns(9)));
>> }
>>
>> TEST_F(FormatTest, SplitsUTF8BlockComments) {
>> @@ -5758,16 +5759,17 @@ TEST_F(FormatTest, SplitsUTF8BlockCommen
>> getLLVMStyleWithColumns(13)));
>> EXPECT_EQ("/* 一二三\n"
>> " * 四五å…七\n"
>> - " * å…«\n"
>> - " * ä¹ å \n"
>> - " */",
>> - format("/* 一二三 四五å…七 å…« ä¹ å */",
>> getLLVMStyleWithColumns(6)));
>> + " * å…« ä¹ \n"
>> + " * å */",
>> + format("/* 一二三 四五å…七 å…« ä¹ å */",
>> getLLVMStyleWithColumns(9)));
>> EXPECT_EQ("/* 𠓣𠓮𠓼𠓽 𠔣𠔬𠔲𠔯\n"
>> " * 𠕓𠕪𠕥𠕖\n"
>> " * 𠖀𠕿𠕱-ð Ÿ */",
>> format("/* 𠓣𠓮𠓼𠓽 𠔣𠔬𠔲𠔯 𠕓𠕪𠕥ð
>> •– 𠖀𠕿𠕱-ð Ÿ */", getLLVMStyleWithColumns(12)));
>> }
>>
>> +#endif // _MSC_VER
>> +
>> TEST_F(FormatTest, FormatsWithWebKitStyle) {
>> FormatStyle Style = getWebKitStyle();
>>
>> @@ -5847,7 +5849,5 @@ TEST_F(FormatTest, FormatsWithWebKitStyl
>> format("if (aaaaaaaaaaaaaaa || bbbbbbbbbbbbbbb) { i++; }",
>> Style));
>> }
>>
>> -#endif
>> -
>> } // end namespace tooling
>> } // end namespace clang
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130809/bcba561c/attachment.html>
More information about the cfe-commits
mailing list