r187935 - Support for double width characters.

Alexander Kornienko alexfh at google.com
Fri Aug 9 08:12:59 PDT 2013


On Thu, Aug 8, 2013 at 4:10 AM, Daniel Jasper <djasper at google.com> wrote:

> With this patch, clang-format tests assert on Mac. The underlying issue is
> that tab characters are not-printable. Thus, the corresponding function on
> mac (somewhat correctly) return -1 and columnWidth asserts. I think we
> should do two things:
> 1) Move the generic columnWidth location into an accessible location. Then
> directly use that from clang-format. We do not want clang-format to output
> different layouts based on the platform.
>

I'm going to test our custom implementation on MacOSX and get rid of
per-platform definitions for columnWidth and isPrint.


> 2) We probably should not count tab characters as 0-width (as I think we
> know do). Can we instead use the configured IndentWidth?
>

We can't handle  tab characters correctly on this level, as their width is
context-dependent. So columnWidth will continue returning -1 for them (as I
hope we now do), and I'll take a look at what we can do to handle them
correctly.


>
> Cheers,
> Daniel
>
>
> On Wed, Aug 7, 2013 at 4:29 PM, Alexander Kornienko <alexfh at google.com>wrote:
>
>> Author: alexfh
>> Date: Wed Aug  7 18:29:01 2013
>> New Revision: 187935
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=187935&view=rev
>> Log:
>> Support for double width characters.
>>
>> Summary: Only works for UTF-8-encoded files.
>>
>> Reviewers: djasper
>>
>> Reviewed By: djasper
>>
>> CC: cfe-commits, klimek
>>
>> Differential Revision: http://llvm-reviews.chandlerc.com/D1311
>>
>> Modified:
>>     cfe/trunk/lib/Format/BreakableToken.cpp
>>     cfe/trunk/unittests/Format/FormatTest.cpp
>>
>> Modified: cfe/trunk/lib/Format/BreakableToken.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=187935&r1=187934&r2=187935&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Format/BreakableToken.cpp (original)
>> +++ cfe/trunk/lib/Format/BreakableToken.cpp Wed Aug  7 18:29:01 2013
>> @@ -20,6 +20,7 @@
>>  #include "clang/Format/Format.h"
>>  #include "llvm/ADT/STLExtras.h"
>>  #include "llvm/Support/Debug.h"
>> +#include "llvm/Support/Locale.h"
>>  #include <algorithm>
>>
>>  namespace clang {
>> @@ -38,6 +39,15 @@ static bool IsBlank(char C) {
>>    }
>>  }
>>
>> +static unsigned columnWidth(StringRef Text, encoding::Encoding Encoding)
>> {
>> +  if (Encoding == encoding::Encoding_UTF8) {
>> +    int ContentWidth = llvm::sys::locale::columnWidth(Text);
>> +    if (ContentWidth >= 0)
>> +      return ContentWidth;
>> +  }
>> +  return encoding::getCodePointCount(Text, Encoding);
>> +}
>> +
>>  static BreakableToken::Split getCommentSplit(StringRef Text,
>>                                               unsigned ContentStartColumn,
>>                                               unsigned ColumnLimit,
>> @@ -49,9 +59,12 @@ static BreakableToken::Split getCommentS
>>    unsigned MaxSplitBytes = 0;
>>
>>    for (unsigned NumChars = 0;
>> -       NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
>> -    MaxSplitBytes +=
>> +       NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
>> +    unsigned NumBytes =
>>          encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
>> +    NumChars += columnWidth(Text.substr(MaxSplitBytes, NumBytes),
>> Encoding);
>> +    MaxSplitBytes += NumBytes;
>> +  }
>>
>>    StringRef::size_type SpaceOffset = Text.find_last_of(Blanks,
>> MaxSplitBytes);
>>    if (SpaceOffset == StringRef::npos ||
>> @@ -84,9 +97,8 @@ static BreakableToken::Split getStringSp
>>      return BreakableToken::Split(StringRef::npos, 0);
>>    if (ColumnLimit <= ContentStartColumn)
>>      return BreakableToken::Split(StringRef::npos, 0);
>> -  unsigned MaxSplit =
>> -      std::min<unsigned>(ColumnLimit - ContentStartColumn,
>> -                         encoding::getCodePointCount(Text, Encoding) -
>> 1);
>> +  unsigned MaxSplit = std::min<unsigned>(ColumnLimit -
>> ContentStartColumn,
>> +                                         columnWidth(Text, Encoding) -
>> 1);
>>    StringRef::size_type SpaceOffset = 0;
>>    StringRef::size_type SlashOffset = 0;
>>    StringRef::size_type WordStartOffset = 0;
>> @@ -98,7 +110,7 @@ static BreakableToken::Split getStringSp
>>        Chars += Advance;
>>      } else {
>>        Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
>> -      Chars += 1;
>> +      Chars += columnWidth(Text.substr(0, Advance), Encoding);
>>      }
>>
>>      if (Chars > MaxSplit)
>> @@ -131,7 +143,7 @@ unsigned BreakableSingleLineToken::getLi
>>  unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
>>      unsigned LineIndex, unsigned Offset, StringRef::size_type Length)
>> const {
>>    return StartColumn + Prefix.size() + Postfix.size() +
>> -         encoding::getCodePointCount(Line.substr(Offset, Length),
>> Encoding);
>> +         columnWidth(Line.substr(Offset, Length), Encoding);
>>  }
>>
>>  BreakableSingleLineToken::BreakableSingleLineToken(
>> @@ -329,8 +341,7 @@ unsigned BreakableBlockComment::getLineC
>>  unsigned BreakableBlockComment::getLineLengthAfterSplit(
>>      unsigned LineIndex, unsigned Offset, StringRef::size_type Length)
>> const {
>>    return getContentStartColumn(LineIndex, Offset) +
>> -         encoding::getCodePointCount(Lines[LineIndex].substr(Offset,
>> Length),
>> -                                     Encoding) +
>> +         columnWidth(Lines[LineIndex].substr(Offset, Length), Encoding) +
>>           // The last line gets a "*/" postfix.
>>           (LineIndex + 1 == Lines.size() ? 2 : 0);
>>  }
>>
>> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=187935&r1=187934&r2=187935&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
>> +++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Aug  7 18:29:01 2013
>> @@ -5704,15 +5704,15 @@ TEST_F(FormatTest, CountsUTF8CharactersP
>>    verifyFormat("\"Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
>> пору...\"",
>>                 getLLVMStyleWithColumns(35));
>>    verifyFormat("\"一 二 三 å›› 五 å…­ 七 å…« ä¹  å  \"",
>> -               getLLVMStyleWithColumns(21));
>> +               getLLVMStyleWithColumns(31));
>>    verifyFormat("// Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
>> пору...",
>>                 getLLVMStyleWithColumns(36));
>>    verifyFormat("// 一 二 三 å›› 五 å…­ 七 å…« ä¹  å  ",
>> -               getLLVMStyleWithColumns(22));
>> +               getLLVMStyleWithColumns(32));
>>    verifyFormat("/* Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
>> пору... */",
>>                 getLLVMStyleWithColumns(39));
>>    verifyFormat("/* 一 二 三 å›› 五 å…­ 七 å…« ä¹  å   */",
>> -               getLLVMStyleWithColumns(25));
>> +               getLLVMStyleWithColumns(35));
>>  }
>>
>>  TEST_F(FormatTest, SplitsUTF8Strings) {
>> @@ -5723,11 +5723,12 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
>>        "\"пору,\"",
>>        format("\"Однажды, в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
>> пору,\"",
>>               getLLVMStyleWithColumns(13)));
>> -  EXPECT_EQ("\"一 二 三 四 \"\n"
>> -            "\"五 六 七 八 \"\n"
>> -            "\"ä¹  å  \"",
>> -            format("\"一 二 三 å›› 五 å…­ 七 å…« ä¹  å  \"",
>> -                   getLLVMStyleWithColumns(10)));
>> +  EXPECT_EQ("\"一 二 三 \"\n"
>> +            "\"四 五六 \"\n"
>> +            "\"七 å…« ä¹  \"\n"
>> +            "\"å  \"",
>> +            format("\"一 二 三 å›› 五六 七 å…« ä¹  å  \"",
>> +                   getLLVMStyleWithColumns(11)));
>>  }
>>
>>  TEST_F(FormatTest, SplitsUTF8LineComments) {
>> @@ -5739,9 +5740,9 @@ TEST_F(FormatTest, SplitsUTF8LineComment
>>                     getLLVMStyleWithColumns(13)));
>>    EXPECT_EQ("// 一二三\n"
>>              "// 四五六七\n"
>> -            "// å…«\n"
>> -            "// ä¹  å  ",
>> -            format("// 一二三 四五六七 å…«  ä¹  å  ",
>> getLLVMStyleWithColumns(6)));
>> +            "// å…«  ä¹ \n"
>> +            "// å  ",
>> +            format("// 一二三 四五六七 å…«  ä¹  å  ",
>> getLLVMStyleWithColumns(9)));
>>  }
>>
>>  TEST_F(FormatTest, SplitsUTF8BlockComments) {
>> @@ -5758,16 +5759,17 @@ TEST_F(FormatTest, SplitsUTF8BlockCommen
>>                     getLLVMStyleWithColumns(13)));
>>    EXPECT_EQ("/* 一二三\n"
>>              " * 四五六七\n"
>> -            " * å…«\n"
>> -            " * ä¹  å  \n"
>> -            " */",
>> -            format("/* 一二三 四五六七 å…«  ä¹  å   */",
>> getLLVMStyleWithColumns(6)));
>> +            " * å…«  ä¹ \n"
>> +            " * å    */",
>> +            format("/* 一二三 四五六七 å…«  ä¹  å    */",
>> getLLVMStyleWithColumns(9)));
>>    EXPECT_EQ("/* 𠓣𠓮𠓼𠓽 𠔣𠔬𠔲𠔯\n"
>>              " * 𠕓𠕪𠕥𠕖\n"
>>              " * 𠖀𠕿𠕱-ð Ÿ  */",
>>              format("/* 𠓣𠓮𠓼𠓽 𠔣𠔬𠔲𠔯 𠕓𠕪𠕥ð
>> •– 𠖀𠕿𠕱-ð Ÿ  */", getLLVMStyleWithColumns(12)));
>>  }
>>
>> +#endif // _MSC_VER
>> +
>>  TEST_F(FormatTest, FormatsWithWebKitStyle) {
>>    FormatStyle Style = getWebKitStyle();
>>
>> @@ -5847,7 +5849,5 @@ TEST_F(FormatTest, FormatsWithWebKitStyl
>>              format("if (aaaaaaaaaaaaaaa || bbbbbbbbbbbbbbb) { i++; }",
>> Style));
>>  }
>>
>> -#endif
>> -
>>  } // end namespace tooling
>>  } // end namespace clang
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130809/bcba561c/attachment.html>


More information about the cfe-commits mailing list